diff --git a/.gitignore b/.gitignore
index 096e385b..085e4f74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,10 @@ build.ninja
 .ninja_deps
 bin/
 lib/
+lib64/
+libexec/
 config/qp_create_ninja.pickle
 src/*/.gitignore
 ezfio_interface.irp.f
 share
+*.swp
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index bec11f5e..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-#sudo: true
-#before_script:
-#  - sudo apt-get update -q
-#  - sudo apt-get remove curl
-#  - sudo apt-get remove zlib1g-dev
-#  - sudo apt-get install autoconf
-#  - sudo rm /usr/local/bin/bats
-
-os: linux
-
-dist: bionic
-
-sudo: false
-
-compiler: gfortran
-
-addons:
-  apt:
-    packages:
-    - gfortran
-    - gcc
-    - libatlas-base-dev
-#    - liblapack-dev
-#    - libblas-dev
-    - wget
-
-env:
-  - OPAMROOT=$HOME/.opam
-
-cache:
-  directories:
-  - $HOME/.opam/
-  - $HOME/cache
-
-language: python
-python:
-    - "3.7"
-
-stages:
-  - configuration
-  - compilation
-  - testing
-
-jobs:
-  include:
-    - stage: configuration
-      script: travis/configuration.sh
-    - stage: compilation
-      script: travis/compilation.sh
-    - stage: testing
-      script: travis/testing.sh
-
diff --git a/INSTALL.rst b/INSTALL.rst
index e37d31eb..e9f4aedb 100644
--- a/INSTALL.rst
+++ b/INSTALL.rst
@@ -316,7 +316,7 @@ OCaml
 
   .. code:: bash
 
-      opam install ocamlbuild cryptokit zmq sexplib ppx_sexp_conv ppx_deriving getopt
+      opam install ocamlbuild zmq sexplib ppx_sexp_conv ppx_deriving getopt
 
 
 Docopt
diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org
index 7724d1d1..3bd02898 100644
--- a/RELEASE_NOTES.org
+++ b/RELEASE_NOTES.org
@@ -1,5 +1,32 @@
 #+TITLE: Quantum Package Release notes
 
+* Version 2.3
+
+** Changes
+
+  - Introduced DFT-based basis set correction
+  - Use OpamPack for OCaml
+  - Configure adapted for ARM
+  - Added many types of integrals
+  - Accelerated four-index transformation
+  - Added transcorrelated SCF
+  - Added transcorrelated CIPSI
+  - Added CCSD and CCSD(T)
+  - Added MO localization
+  - Changed coupling parameters for ROHF
+  - General Davidson algorithm
+  - Accelerated restore_symmetry
+  - Point charges in the Hamiltonian
+  - Removed cryptokit dependency in OCaml
+  - Using now standard convention in RDM
+  - Added molecular properties
+  - [ ] Added GTOs with complex exponent
+
+*** TODO: take from dev
+  - Updated version of f77-zmq
+  - Started to introduce shells in AOs
+  - Added ECMD UEG functional
+
 * Version 2.2
 
 ** Changes
@@ -32,62 +59,61 @@
   - Fixed bug with non-contiguous MOs in active space and deleter MOs
   - Complete network-free installation
   - Fixed bug in selection when computing full PT2
-  - Updated version of f77-zmq
 
-*** User interface
+** User interface
 
-    - Added ~qp_basis~ script to install a basis set from the ~bse~
-      command-line tool
-    - Introduced ~n_det_qp_edit~, ~psi_det_qp_edit~, and
-      ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with
-      large wave functions
-    - Removed ~etc/ninja.rc~
-    - Added flag to specify if the AOs are normalized
-    - Added flag to specify if the primitive Gaussians are normalized
-    - Added ~lin_dep_cutoff~, the cutoff for linear dependencies
-    - Davidson convergence threshold can be adapted from PT2
-    - In ~density_for_dft~, ~no_core_density~ is now a logical
-    - Default for ~weight_selection~ has changed from 2 to 1
-    - Nullify_small_elements in matrices to keep symmetry
-    - Default of density functional changed from LDA to PBE
-    - Added ~no_vvvv_integrals~ flag
-    - Added ~pt2_min_parallel_tasks~ to control parallelism in PT2
-    - Added ~print_energy~
-    - Added ~print_hamiltonian~
-    - Added input for two body RDM
-    - Added keyword ~save_wf_after_selection~
-    - Added a ~restore_symm~ flag to enforce the restoration of
-      symmetry in matrices
-    - qp_export_as_tgz exports also plugin codes
-    - Added a basis module containing basis set information
-    - Added qp_run truncate_wf
+  - Added ~qp_basis~ script to install a basis set from the ~bse~
+    command-line tool
+  - Introduced ~n_det_qp_edit~, ~psi_det_qp_edit~, and
+    ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with
+    large wave functions
+  - Removed ~etc/ninja.rc~
+  - Added flag to specify if the AOs are normalized
+  - Added flag to specify if the primitive Gaussians are normalized
+  - Added ~lin_dep_cutoff~, the cutoff for linear dependencies
+  - Davidson convergence threshold can be adapted from PT2
+  - In ~density_for_dft~, ~no_core_density~ is now a logical
+  - Default for ~weight_selection~ has changed from 2 to 1
+  - Nullify_small_elements in matrices to keep symmetry
+  - Default of density functional changed from LDA to PBE
+  - Added ~no_vvvv_integrals~ flag
+  - Added ~pt2_min_parallel_tasks~ to control parallelism in PT2
+  - Added ~print_energy~
+  - Added ~print_hamiltonian~
+  - Added input for two body RDM
+  - Added keyword ~save_wf_after_selection~
+  - Added a ~restore_symm~ flag to enforce the restoration of
+    symmetry in matrices
+  - qp_export_as_tgz exports also plugin codes
+  - Added a basis module containing basis set information
+  - Added qp_run truncate_wf
 
-*** Code
+** Code
 
-    - Many bug fixes
-    - Changed electron-nucleus from ~e_n~ to ~n_e~ in names of variables
-    - Changed ~occ_pattern~ to ~configuration~
-    - Replaced ~List.map~ by a tail-recursive version ~Qputils.list_map~
-    - Added possible imaginary part in OCaml MO coefficients
-    - Added ~qp_clean_source_files.sh~ to remove non-ascii characters
-    - Added flag ~is_periodic~ for periodic systems
-    - Possibilities to handle complex integrals and complex MOs
-    - Moved pseuodpotential integrals out of ~ao_one_e_integrals~
-    - Removed Schwarz test and added logical functions
-      ~ao_two_e_integral_zero~ and ~ao_one_e_integral_zero~
-    - Introduced type for ~pt2_data~
-    - Banned excitations are used with far apart localized MOs
-    - S_z2_Sz is now included in S2
-    - S^2 in single precision
-    - Added Shank function
-    - Added utilities for periodic calculations
-    - Added ~V_ne_psi_energy~
-    - Added ~h_core_guess~ routine
-    - Fixed Laplacians in real space (indices)
-    - Added LIB file to add extra libs in plugin
-    - Using Intel IPP for sorting when using Intel compiler
-    - Removed parallelism in sorting
-    - Compute banned_excitations from exchange integrals to accelerate with local MOs
+  - Many bug fixes
+  - Changed electron-nucleus from ~e_n~ to ~n_e~ in names of variables
+  - Changed ~occ_pattern~ to ~configuration~
+  - Replaced ~List.map~ by a tail-recursive version ~Qputils.list_map~
+  - Added possible imaginary part in OCaml MO coefficients
+  - Added ~qp_clean_source_files.sh~ to remove non-ascii characters
+  - Added flag ~is_periodic~ for periodic systems
+  - Possibilities to handle complex integrals and complex MOs
+  - Moved pseuodpotential integrals out of ~ao_one_e_integrals~
+  - Removed Schwarz test and added logical functions
+    ~ao_two_e_integral_zero~ and ~ao_one_e_integral_zero~
+  - Introduced type for ~pt2_data~
+  - Banned excitations are used with far apart localized MOs
+  - S_z2_Sz is now included in S2
+  - S^2 in single precision
+  - Added Shank function
+  - Added utilities for periodic calculations
+  - Added ~V_ne_psi_energy~
+  - Added ~h_core_guess~ routine
+  - Fixed Laplacians in real space (indices)
+  - Added LIB file to add extra libs in plugin
+  - Using Intel IPP for sorting when using Intel compiler
+  - Removed parallelism in sorting
+  - Compute banned_excitations from exchange integrals to accelerate with local MOs
 
 
 
diff --git a/bin/qp_reset b/bin/qp_reset
index 74dd1f78..d94ab24c 100755
--- a/bin/qp_reset
+++ b/bin/qp_reset
@@ -105,6 +105,7 @@ if [[ $mos -eq 1 ]] ; then
       echo "Warning: You will need to re-define the MO classes"
   fi
   rm --recursive --force -- ${ezfio}/mo_basis
+  rm --recursive --force -- ${ezfio}/bi_ortho_mos
   rm --recursive --force -- ${ezfio}/work/mo_ints_*
 fi
 
diff --git a/bin/qp_test b/bin/qp_test
index 67b3ea02..288b7291 100755
--- a/bin/qp_test
+++ b/bin/qp_test
@@ -60,19 +60,14 @@ def main(arguments):
         print("Running tests for %s"%(bats_file))
         print("")
         if arguments["-v"]:
-            p = None
             if arguments["TEST"]:
                 test = "export TEST=%s ; "%arguments["TEST"]
             else:
                 test = ""
-            try:
-                os.system(test+" python3 bats_to_sh.py "+bats_file+
+            os.system(test+" python3 bats_to_sh.py "+bats_file+
                                     "| bash")
-            except:
-                if p:
-                    p.terminate()
         else:
-            subprocess.check_call(["bats", bats_file], env=os.environ)
+            subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ)
 
 
 
diff --git a/codemeta.json b/codemeta.json
new file mode 100644
index 00000000..9e63b5bc
--- /dev/null
+++ b/codemeta.json
@@ -0,0 +1,66 @@
+{
+    "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+    "@type": "SoftwareSourceCode",
+    "license": "https://spdx.org/licenses/AGPL-3.0",
+    "codeRepository": "https://github.com/QuantumPackage/qp2",
+    "dateCreated": "2014-04-01",
+    "datePublished": "2019-06-11",
+    "dateModified": "2020-02-21",
+    "downloadUrl": "https://github.com/QuantumPackage/qp2/releases/tag/2.1.2",
+    "issueTracker": "https://github.com/QuantumPackage/qp2/issues",
+    "name": "Quantum Package",
+    "version": "2.1.2",
+    "identifier": "https://doi.org/10.5281/zenodo.3677565",
+    "description": "Programming environment for wave function methods",
+    "applicationCategory": "Quantum Chemistry",
+    "funding": "ERC_863481, CoE_952165",
+    "developmentStatus": "active",
+    "referencePublication": "https://doi.org/10.1021/acs.jctc.9b00176",
+    "funder": {
+        "@type": "Organization",
+        "name": "CNRS"
+    },
+    "keywords": [
+        "selected configuration interaction",
+        "CIPSI"
+    ],
+    "programmingLanguage": [
+        "Fortran",
+        "IRPF90",
+        "OCaml",
+        "Python",
+        "C"
+    ],
+    "operatingSystem": [
+        "Linux"
+    ],
+    "softwareRequirements": [
+        "ZeroMQ"
+    ],
+    "author": [
+        {
+            "@type": "Person",
+            "@id": "https://orcid.org/0000-0003-4955-7136",
+            "givenName": "Scemama",
+            "familyName": "Anthony",
+            "email": "scemama@irsamc.ups-tlse.fr",
+            "affiliation": {
+                "@type": "Organization",
+                "name": "Laboratoire de chimie et physique quantiques, Toulouse, CNRS"
+            }
+        }
+    ],
+    "contributor": [
+        {
+            "@type": "Person",
+            "givenName": "Emmanuel",
+            "familyName": "Giner",
+            "email": "eginer@lct.jussieu.fr",
+            "affiliation": {
+                "@type": "Organization",
+                "name": "Laboratoire de Chimie Theorique, Paris, CNRS"
+            }
+        }
+    ]
+}
+
diff --git a/config/bull.cfg b/config/bull.cfg
index 6a93fdca..91471473 100644
--- a/config/bull.cfg
+++ b/config/bull.cfg
@@ -6,7 +6,7 @@
 # --align=32                 : Align all provided arrays on a 32-byte boundary
 #
 [COMMON]
-FC           : mpiifort -shared-libgcc -shared-intel -fpic
+FC           : mpiifort -fpic -xCORE-AVX2 
 LAPACK_LIB   : -mkl=parallel 
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI  
@@ -31,7 +31,7 @@ OPENMP  : 1          ; Append OpenMP flags
 # -ftz                       : Flushes denormal results to zero
 #
 [OPT]
-FCFLAGS  : -xCORE-AVX2 -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive 
+FCFLAGS  : -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive 
 # Profiling flags
 #################
 #
diff --git a/config/cray_gfortran.cfg b/config/cray_gfortran.cfg
new file mode 100644
index 00000000..1d1013b7
--- /dev/null
+++ b/config/cray_gfortran.cfg
@@ -0,0 +1,68 @@
+# On LUMI
+# 
+# export SPACK_USER_PREFIX=$HOME/spack
+# module swap PrgEnv-cray/8.3.3 PrgEnv-gnu/8.3.3
+# module load spack/22.08
+# module load openblas/0.3.17-gcc-omp-xi
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -ffree-line-length-none -I . -mavx -g -fPIC
+LAPACK_LIB   : -L/appl/lumi/spack/22.08/0.18.1/opt/spack/openblas-0.3.17-xinceno/lib -lopenblas
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED 
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+# 
+[OPTION]
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast -march=native
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC       : -p -g
+FCFLAGS  : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -fcheck=all -g
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/config/cray.cfg b/config/cray_intel.cfg
similarity index 100%
rename from config/cray.cfg
rename to config/cray_intel.cfg
diff --git a/config/gfortran.cfg b/config/gfortran.cfg
index 56bb6ba4..33ce48ba 100644
--- a/config/gfortran.cfg
+++ b/config/gfortran.cfg
@@ -10,7 +10,7 @@
 #
 #
 [COMMON]
-FC           : gfortran -g -ffree-line-length-none -I . -fPIC
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
 LAPACK_LIB   : -lblas -llapack
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
@@ -22,7 +22,7 @@ IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
 # 0 : Deactivate
 #
 [OPTION]
-MODE    : DEBUG      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
 CACHE   : 0          ; Enable cache_compile.py
 OPENMP  : 1          ; Append OpenMP flags
 
diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg
new file mode 100644
index 00000000..fb5ee1cc
--- /dev/null
+++ b/config/gfortran_armpl.cfg
@@ -0,0 +1,65 @@
+# Common flags
+##############
+# module load arm
+# module load gnu
+# module load acfl
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
+LAPACK_LIB   : -larmpl_lp64
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0        ; Enable cache_compile.py
+OPENMP  : 1        ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g
+FCFLAGS : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -g -march=native  -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized  -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/config/gfortran_openblas.cfg b/config/gfortran_openblas.cfg
new file mode 100644
index 00000000..ab67d8c3
--- /dev/null
+++ b/config/gfortran_openblas.cfg
@@ -0,0 +1,62 @@
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
+LAPACK_LIB   : -lopenblas
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0        ; Enable cache_compile.py
+OPENMP  : 1        ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g
+FCFLAGS : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -g -march=native  -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized  -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/config/ifort_2019_avx.cfg b/config/ifort_2019_avx.cfg
index 661a0e8f..c5bed0d8 100644
--- a/config/ifort_2019_avx.cfg
+++ b/config/ifort_2019_avx.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel 
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_avx_mpi.cfg b/config/ifort_2019_avx_mpi.cfg
index 2d212db5..5b4d2922 100644
--- a/config/ifort_2019_avx_mpi.cfg
+++ b/config/ifort_2019_avx_mpi.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_debug.cfg b/config/ifort_2019_debug.cfg
new file mode 100644
index 00000000..cb14f467
--- /dev/null
+++ b/config/ifort_2019_debug.cfg
@@ -0,0 +1,66 @@
+# Common flags
+##############
+#
+# -mkl=[parallel|sequential] : Use the MKL library
+# --ninja                 : Allow the utilisation of ninja. It is mandatory !
+# --align=32                 : Align all provided arrays on a 32-byte boundary
+#
+[COMMON]
+FC           : ifort -fpic
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+# 
+[OPTION]
+MODE    : DEBUG      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -xHost                     : Compile a binary optimized for the current architecture
+# -O2                        : O3 not better than O2.
+# -ip                        : Inter-procedural optimizations
+# -ftz                       : Flushes denormal results to zero
+#
+[OPT]
+FC       : -traceback
+FCFLAGS  : -msse4.2 -O2 -ip -ftz -g 
+
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC       : -p -g
+FCFLAGS  : -msse4.2 -O2 -ip -ftz 
+
+
+# Debugging flags
+#################
+#
+# -traceback   : Activate backtrace on runtime
+# -fpe0        : All floating point exaceptions
+# -C           : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+# -msse4.2     : Valgrind needs a very simple x86 executable
+#
+[DEBUG]
+FC      : -g -traceback
+FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0  -implicitnone
+
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -qopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/config/ifort_2019_mpi_rome.cfg b/config/ifort_2019_mpi_rome.cfg
index 171219e6..054d4d7d 100644
--- a/config/ifort_2019_mpi_rome.cfg
+++ b/config/ifort_2019_mpi_rome.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_rome.cfg b/config/ifort_2019_rome.cfg
index e923a1dd..a18a0acb 100644
--- a/config/ifort_2019_rome.cfg
+++ b/config/ifort_2019_rome.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_sse4.cfg b/config/ifort_2019_sse4.cfg
index a3aa7cbd..2cdbc2c5 100644
--- a/config/ifort_2019_sse4.cfg
+++ b/config/ifort_2019_sse4.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_sse4_mpi.cfg b/config/ifort_2019_sse4_mpi.cfg
index 6959d176..d20cd2a2 100644
--- a/config/ifort_2019_sse4_mpi.cfg
+++ b/config/ifort_2019_sse4_mpi.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2019_xHost.cfg b/config/ifort_2019_xHost.cfg
index 22d28803..59c6146b 100644
--- a/config/ifort_2019_xHost.cfg
+++ b/config/ifort_2019_xHost.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=64 -DINTEL -DSET_NESTED
 
diff --git a/config/ifort_2021_avx.cfg b/config/ifort_2021_avx.cfg
index 6f657052..6c34cf47 100644
--- a/config/ifort_2021_avx.cfg
+++ b/config/ifort_2021_avx.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 
 
diff --git a/config/ifort_2021_avx_mpi.cfg b/config/ifort_2021_avx_mpi.cfg
index c991a4a9..4c893c73 100644
--- a/config/ifort_2021_avx_mpi.cfg
+++ b/config/ifort_2021_avx_mpi.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL 
 
diff --git a/config/ifort_2021_mpi_rome.cfg b/config/ifort_2021_mpi_rome.cfg
index 8413d23d..e47a466e 100644
--- a/config/ifort_2021_mpi_rome.cfg
+++ b/config/ifort_2021_mpi_rome.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 
 
diff --git a/config/ifort_2021_rome.cfg b/config/ifort_2021_rome.cfg
index b3023186..504438c9 100644
--- a/config/ifort_2021_rome.cfg
+++ b/config/ifort_2021_rome.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 
 
diff --git a/config/ifort_2021_sse4.cfg b/config/ifort_2021_sse4.cfg
index a6299665..07c3ebb8 100644
--- a/config/ifort_2021_sse4.cfg
+++ b/config/ifort_2021_sse4.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 
 
diff --git a/config/ifort_2021_sse4_mpi.cfg b/config/ifort_2021_sse4_mpi.cfg
index 6ae56d2a..f3fa0eaa 100644
--- a/config/ifort_2021_sse4_mpi.cfg
+++ b/config/ifort_2021_sse4_mpi.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL 
 
diff --git a/config/ifort_2021_xHost.cfg b/config/ifort_2021_xHost.cfg
index 1e76a69d..1161833b 100644
--- a/config/ifort_2021_xHost.cfg
+++ b/config/ifort_2021_xHost.cfg
@@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+LAPACK_LIB   : -mkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=64 -DINTEL 
 
diff --git a/configure b/configure
index 5bbff907..5b50d0d7 100755
--- a/configure
+++ b/configure
@@ -19,19 +19,9 @@ git submodule update
 # Update ARM or x86 dependencies
 ARCHITECTURE=$(uname -m)
 cd ${QP_ROOT}/external/qp2-dependencies
+git checkout master
+git pull
 echo "Architecture: $ARCHITECTURE"
-case $ARCHITECTURE in
-  aarch64)
-    git checkout arm64
-    ;;
-  x86_64) 
-    git checkout x86
-    ;;
-  *)
-    echo "Unknown architecture. Using x86_64."
-    git checkout x86
-    ;;
-esac
 cd ${QP_ROOT}
 
 
@@ -118,7 +108,7 @@ PACKAGES=$(echo $PACKAGES | xargs)
 
 echo "export QP_ROOT=\"$QP_ROOT\"" > ${QP_ROOT}/etc/00.qp_root.rc
 
-source quantum_package.rc
+source ${QP_ROOT}/quantum_package.rc
 
 
 
@@ -209,7 +199,7 @@ for PACKAGE in ${PACKAGES} ; do
 
             execute << EOF
               rm -f "\${QP_ROOT}"/bin/ninja
-              tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/ninja.tar.gz 
+              tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz 
               mv ninja "\${QP_ROOT}"/bin/
 EOF
 
@@ -256,7 +246,7 @@ EOF
             source "${QP_ROOT}"/quantum_package.rc
             rm -rf "${QP_ROOT}"/external/opampack
             cd "${QP_ROOT}"/external/
-            tar --gunzip --extract --file qp2-dependencies/opampack.tar.gz
+            tar --gunzip --extract --file qp2-dependencies/${ARCHITECTURE}/opampack.tar.gz
             cd "${QP_ROOT}"/external/opampack
             ./install.sh
             export OPAMROOT="${QP_ROOT}"/external/opampack/opamroot
@@ -268,7 +258,7 @@ EOF
             execute << EOF
               cd "\${QP_ROOT}"/external
               tar --gunzip --extract --file qp2-dependencies/bse-v0.8.11.tar.gz
-              pip install -e basis_set_exchange-*
+              python3 -m pip install -e basis_set_exchange-*
 EOF
 
     elif [[ ${PACKAGE} = zlib ]] ; then
@@ -303,8 +293,8 @@ EOF
 
             execute << EOF
               cd "\${QP_ROOT}"/external
-              tar -zxf qp2-dependencies/bats-v1.1.0.tar.gz
-              ( cd bats-core-1.1.0/ ; ./install.sh \${QP_ROOT})
+              tar -zxf qp2-dependencies/bats-v1.7.0.tar.gz
+              ( cd bats-core-1.7.0/ ; ./install.sh \${QP_ROOT})
 EOF
 
     else
@@ -315,7 +305,7 @@ EOF
 
 done
 
-source quantum_package.rc
+source ${QP_ROOT}/quantum_package.rc
 
 NINJA=$(find_exe ninja)
 if [[ ${NINJA} = $(not_found) ]] ; then
@@ -416,7 +406,7 @@ else
       echo ""
       echo "${QP_ROOT}/build.ninja does not exist,"
       echo "you need to specify the COMPILATION configuration file."
-      echo "See  ./configure --help  for more details."
+      echo "See  ./configure -h  for more details."
       echo ""
 fi
 
diff --git a/data/basis/cc-pcvdz b/data/basis/cc-pcvdz
index d874fb06..76985d4a 100644
--- a/data/basis/cc-pcvdz
+++ b/data/basis/cc-pcvdz
@@ -991,4 +991,266 @@ D   1
 1         1.3743000              1.0000000
 D   1
 1         0.0537000              1.00000000
-$END
\ No newline at end of file
+
+COPPER
+S   20
+1         5.430321E+06           7.801026E-06
+2         8.131665E+05           6.065666E-05
+3         1.850544E+05           3.188964E-04
+4         5.241466E+04           1.344687E-03
+5         1.709868E+04           4.869050E-03
+6         6.171994E+03           1.561013E-02
+7         2.406481E+03           4.452077E-02
+8         9.972584E+02           1.103111E-01
+9         4.339289E+02           2.220342E-01
+10        1.962869E+02           3.133739E-01
+11        9.104280E+01           2.315121E-01
+12        4.138425E+01           7.640920E-02
+13        1.993278E+01           1.103818E-01
+14        9.581891E+00           1.094372E-01
+15        4.234516E+00           1.836311E-02
+16        1.985814E+00          -6.043084E-04
+17        8.670830E-01           5.092245E-05
+18        1.813390E-01          -5.540730E-05
+19        8.365700E-02           3.969482E-05
+20        3.626700E-02          -1.269538E-05
+S   20
+1         5.430321E+06          -4.404706E-06
+2         8.131665E+05          -3.424801E-05
+3         1.850544E+05          -1.801238E-04
+4         5.241466E+04          -7.600455E-04
+5         1.709868E+04          -2.759348E-03
+6         6.171994E+03          -8.900970E-03
+7         2.406481E+03          -2.579378E-02
+8         9.972584E+02          -6.623861E-02
+9         4.339289E+02          -1.445927E-01
+10        1.962869E+02          -2.440110E-01
+11        9.104280E+01          -2.504837E-01
+12        4.138425E+01           2.852577E-02
+13        1.993278E+01           5.115874E-01
+14        9.581891E+00           4.928061E-01
+15        4.234516E+00           8.788437E-02
+16        1.985814E+00          -5.820281E-03
+17        8.670830E-01           2.013508E-04
+18        1.813390E-01          -5.182553E-04
+19        8.365700E-02           3.731503E-04
+20        3.626700E-02          -1.193171E-04
+S   20
+1         5.430321E+06           9.704682E-07
+2         8.131665E+05           7.549245E-06
+3         1.850544E+05           3.968892E-05
+4         5.241466E+04           1.677200E-04
+5         1.709868E+04           6.095101E-04
+6         6.171994E+03           1.978846E-03
+7         2.406481E+03           5.798049E-03
+8         9.972584E+02           1.534158E-02
+9         4.339289E+02           3.540484E-02
+10        1.962869E+02           6.702098E-02
+11        9.104280E+01           8.026945E-02
+12        4.138425E+01          -1.927231E-02
+13        1.993278E+01          -3.160129E-01
+14        9.581891E+00          -4.573162E-01
+15        4.234516E+00           1.550841E-01
+16        1.985814E+00           7.202872E-01
+17        8.670830E-01           3.885122E-01
+18        1.813390E-01           1.924326E-02
+19        8.365700E-02          -7.103807E-03
+20        3.626700E-02           3.272906E-03
+S   20
+1         5.430321E+06          -1.959354E-07
+2         8.131665E+05          -1.523472E-06
+3         1.850544E+05          -8.014808E-06
+4         5.241466E+04          -3.383992E-05
+5         1.709868E+04          -1.231191E-04
+6         6.171994E+03          -3.992085E-04
+7         2.406481E+03          -1.171900E-03
+8         9.972584E+02          -3.096141E-03
+9         4.339289E+02          -7.171993E-03
+10        1.962869E+02          -1.356621E-02
+11        9.104280E+01          -1.643989E-02
+12        4.138425E+01           4.107628E-03
+13        1.993278E+01           6.693964E-02
+14        9.581891E+00           1.028221E-01
+15        4.234516E+00          -4.422945E-02
+16        1.985814E+00          -2.031191E-01
+17        8.670830E-01          -2.230022E-01
+18        1.813390E-01           2.517975E-01
+19        8.365700E-02           5.650091E-01
+20        3.626700E-02           3.247243E-01
+S   20
+1         5.430321E+06          -7.508267E-07
+2         8.131665E+05          -5.972018E-06
+3         1.850544E+05          -3.039682E-05
+4         5.241466E+04          -1.340405E-04
+5         1.709868E+04          -4.615778E-04
+6         6.171994E+03          -1.601064E-03
+7         2.406481E+03          -4.330942E-03
+8         9.972584E+02          -1.265434E-02
+9         4.339289E+02          -2.586864E-02
+10        1.962869E+02          -5.835428E-02
+11        9.104280E+01          -5.132322E-02
+12        4.138425E+01          -1.908953E-02
+13        1.993278E+01           3.586116E-01
+14        9.581891E+00           3.885818E-01
+15        4.234516E+00          -3.057106E-01
+16        1.985814E+00          -2.069896E+00
+17        8.670830E-01           2.431774E+00
+18        1.813390E-01          -2.121974E-02
+19        8.365700E-02          -1.820251E+00
+20        3.626700E-02           1.434585E+00
+S   20
+1         5.430321E+06          -3.532229E-07
+2         8.131665E+05          -2.798812E-06
+3         1.850544E+05          -1.432517E-05
+4         5.241466E+04          -6.270946E-05
+5         1.709868E+04          -2.179490E-04
+6         6.171994E+03          -7.474316E-04
+7         2.406481E+03          -2.049271E-03
+8         9.972584E+02          -5.885203E-03
+9         4.339289E+02          -1.226885E-02
+10        1.962869E+02          -2.683147E-02
+11        9.104280E+01          -2.479261E-02
+12        4.138425E+01          -5.984746E-03
+13        1.993278E+01           1.557124E-01
+14        9.581891E+00           1.436683E-01
+15        4.234516E+00           8.374103E-03
+16        1.985814E+00          -7.460711E-01
+17        8.670830E-01           1.244367E-01
+18        1.813390E-01           1.510110E+00
+19        8.365700E-02          -3.477122E-01
+20        3.626700E-02          -9.774169E-01
+S   1
+1         3.626700E-02           1.000000E+00
+S   1
+1         0.0157200              1.0000000
+P   16
+1         2.276057E+04           4.000000E-05
+2         5.387679E+03           3.610000E-04
+3         1.749945E+03           2.083000E-03
+4         6.696653E+02           9.197000E-03
+5         2.841948E+02           3.266000E-02
+6         1.296077E+02           9.379500E-02
+7         6.225415E+01           2.082740E-01
+8         3.092964E+01           3.339930E-01
+9         1.575827E+01           3.324930E-01
+10        8.094211E+00           1.547280E-01
+11        4.046921E+00           2.127100E-02
+12        1.967869E+00          -1.690000E-03
+13        9.252950E-01          -1.516000E-03
+14        3.529920E-01          -2.420000E-04
+15        1.273070E-01           2.300000E-05
+16        4.435600E-02          -9.000000E-06
+P   16
+1         2.276057E+04          -1.500000E-05
+2         5.387679E+03          -1.310000E-04
+3         1.749945E+03          -7.550000E-04
+4         6.696653E+02          -3.359000E-03
+5         2.841948E+02          -1.208100E-02
+6         1.296077E+02          -3.570300E-02
+7         6.225415E+01          -8.250200E-02
+8         3.092964E+01          -1.398900E-01
+9         1.575827E+01          -1.407290E-01
+10        8.094211E+00           3.876600E-02
+11        4.046921E+00           3.426950E-01
+12        1.967869E+00           4.523100E-01
+13        9.252950E-01           2.770540E-01
+14        3.529920E-01           4.388500E-02
+15        1.273070E-01          -2.802000E-03
+16        4.435600E-02           1.152000E-03
+P   16
+1         2.276057E+04           5.000000E-06
+2         5.387679E+03           4.900000E-05
+3         1.749945E+03           2.780000E-04
+4         6.696653E+02           1.253000E-03
+5         2.841948E+02           4.447000E-03
+6         1.296077E+02           1.337000E-02
+7         6.225415E+01           3.046900E-02
+8         3.092964E+01           5.344700E-02
+9         1.575827E+01           5.263900E-02
+10        8.094211E+00          -1.688100E-02
+11        4.046921E+00          -1.794480E-01
+12        1.967869E+00          -2.095880E-01
+13        9.252950E-01          -3.963300E-02
+14        3.529920E-01           5.021300E-01
+15        1.273070E-01           5.811110E-01
+16        4.435600E-02           4.566600E-02
+P   16
+1         2.276057E+04           1.100000E-05
+2         5.387679E+03           9.600000E-05
+3         1.749945E+03           5.900000E-04
+4         6.696653E+02           2.484000E-03
+5         2.841948E+02           9.463000E-03
+6         1.296077E+02           2.645300E-02
+7         6.225415E+01           6.568900E-02
+8         3.092964E+01           1.027320E-01
+9         1.575827E+01           1.370410E-01
+10        8.094211E+00          -7.096100E-02
+11        4.046921E+00          -5.047080E-01
+12        1.967869E+00          -4.780560E-01
+13        9.252950E-01           9.428920E-01
+14        3.529920E-01           5.446990E-01
+15        1.273070E-01          -8.327660E-01
+16        4.435600E-02          -1.084160E-01
+P   16
+1         2.276057E+04           3.000000E-06
+2         5.387679E+03           2.500000E-05
+3         1.749945E+03           1.470000E-04
+4         6.696653E+02           6.560000E-04
+5         2.841948E+02           2.351000E-03
+6         1.296077E+02           7.004000E-03
+7         6.225415E+01           1.613100E-02
+8         3.092964E+01           2.777000E-02
+9         1.575827E+01           2.756700E-02
+10        8.094211E+00          -1.011500E-02
+11        4.046921E+00          -8.100900E-02
+12        1.967869E+00          -1.104090E-01
+13        9.252950E-01          -7.173200E-02
+14        3.529920E-01           1.879300E-01
+15        1.273070E-01           5.646290E-01
+16        4.435600E-02           4.070000E-01
+P   1
+1         4.435600E-02           1.000000E+00
+P   1
+1         0.0154500              1.0000000
+D   8
+1         1.738970E+02           2.700000E-03
+2         5.188690E+01           2.090900E-02
+3         1.934190E+01           8.440800E-02
+4         7.975720E+00           2.139990E-01
+5         3.398230E+00           3.359800E-01
+6         1.409320E+00           3.573010E-01
+7         5.488580E-01           2.645780E-01
+8         1.901990E-01           1.039720E-01
+D   8
+1         1.738970E+02          -3.363000E-03
+2         5.188690E+01          -2.607900E-02
+3         1.934190E+01          -1.082310E-01
+4         7.975720E+00          -2.822170E-01
+5         3.398230E+00          -3.471900E-01
+6         1.409320E+00           2.671100E-02
+7         5.488580E-01           4.920470E-01
+8         1.901990E-01           4.384220E-01
+D   8
+1         1.738970E+02           4.133000E-03
+2         5.188690E+01           3.308500E-02
+3         1.934190E+01           1.383360E-01
+4         7.975720E+00           3.901660E-01
+5         3.398230E+00           1.698420E-01
+6         1.409320E+00          -6.830180E-01
+7         5.488580E-01          -2.657970E-01
+8         1.901990E-01           8.380630E-01
+D   1
+1         1.901990E-01           1.000000E+00
+D   1
+1         0.0659100              1.0000000
+F   1
+1         5.082100E+00           1.000000E+00
+F   1
+1         1.279700E+00           1.000000E+00
+F   1
+1         0.4617200              1.0000000
+G   1
+1         3.483500E+00           1.0000000
+G   1
+1         1.4597900              1.0000000
+$END
diff --git a/data/basis/cc-pv5z_ecp_bfd b/data/basis/cc-pv5z_ecp_bfd
index a19ce9d8..84b0300e 100644
--- a/data/basis/cc-pv5z_ecp_bfd
+++ b/data/basis/cc-pv5z_ecp_bfd
@@ -555,7 +555,7 @@ g 1 1.00
 g 1 1.00
 1 0.457496     1.000000
 
-MAGNESIUM 
+MAGNESIUM
 s 9 1.00
 1 0.030975     0.165290
 2 0.062959     0.506272
diff --git a/external/.gitignore b/external/.gitignore
index 676c79b7..241e560d 100644
--- a/external/.gitignore
+++ b/external/.gitignore
@@ -1,2 +1,2 @@
-#*
+*
 
diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index b8cd5815..ce14f57b 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit b8cd5815bce14c9b880e3c5ea3d5fc2652f5955c
+Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4
diff --git a/include/.gitignore b/include/.gitignore
index d52be2d2..72e8ffc0 100644
--- a/include/.gitignore
+++ b/include/.gitignore
@@ -1,7 +1 @@
-zmq.h
-gmp.h
-zconf.h
-zconf.h
-zlib.h
-zmq_utils.h
-f77_zmq_free.h
+*
diff --git a/ocaml/Makefile b/ocaml/Makefile
index 8853a991..c03be131 100644
--- a/ocaml/Makefile
+++ b/ocaml/Makefile
@@ -43,7 +43,7 @@ $(QP_ROOT)/data/executables: remake_executables element_create_db.byte Qptypes.m
 	$(QP_ROOT)/ocaml/element_create_db.byte
 
 external_libs:
-	opam install cryptokit sexplib
+	opam install sexplib
 
 qpackage.odocl: $(MLIFILES)
 	ls $(MLIFILES) | sed "s/\.mli//" > qpackage.odocl
diff --git a/ocaml/To_md5.ml b/ocaml/To_md5.ml
index bc6608f9..1485678c 100644
--- a/ocaml/To_md5.ml
+++ b/ocaml/To_md5.ml
@@ -4,8 +4,8 @@ open Sexplib
 let to_md5 sexp_of_t t =
   sexp_of_t t
   |> Sexp.to_string
-  |> Cryptokit.hash_string (Cryptokit.Hash.md5 ())
-  |> Cryptokit.transform_string (Cryptokit.Hexa.encode ())
+  |> Digest.string 
+  |> Digest.to_hex
   |> MD5.of_string
 ;;
 
diff --git a/ocaml/_tags b/ocaml/_tags
index 55b1c681..0ff23d59 100644
--- a/ocaml/_tags
+++ b/ocaml/_tags
@@ -1,4 +1,4 @@
-true: package(cryptokit,zarith,zmq,str,sexplib,ppx_sexp_conv,ppx_deriving,getopt)
+true: package(zarith,zmq,str,sexplib,ppx_sexp_conv,ppx_deriving,getopt)
 true: thread
 false: profile
 <*byte>   : linkdep(c_bindings.o), custom
diff --git a/scripts/.gitignore b/scripts/.gitignore
index b44ac5a2..103b3ae9 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -2,3 +2,6 @@
 *.pyo
 docopt.py
 resultsFile/
+verif_omp/a.out
+src/*/Makefile
+src/*/*/
diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja
index c0ba8c6a..aad85778 100755
--- a/scripts/compilation/qp_create_ninja
+++ b/scripts/compilation/qp_create_ninja
@@ -99,9 +99,20 @@ def ninja_create_env_variable(pwd_config_file):
     l_string = ["builddir = {0}".format(os.path.dirname(ROOT_BUILD_NINJA)),
                 ""]
 
+
     for flag in ["FC", "FCFLAGS", "IRPF90", "IRPF90_FLAGS"]:
         str_ = "{0} = {1}".format(flag, get_compilation_option(pwd_config_file,
                                                                flag))
+        for directory in [real_join(QP_SRC, m) for m in sorted(os.listdir(QP_SRC))]:
+            includefile = real_join(directory, flag)
+            try:
+                content = ""
+                with open(includefile,'r') as f:
+                    content = f.read()
+                    str_ += " "+content
+            except IOError:
+                pass
+
         l_string.append(str_)
 
     lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB")
@@ -110,17 +121,20 @@ def ninja_create_env_variable(pwd_config_file):
     str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr])
 
     # Read all LIB files in modules
-    libfile = "LIB"
-    try:
-        content = ""
-        with open(libfile,'r') as f:
-            content = f.read()
-            str_lib += " "+content
-    except IOError:
-        pass
+    for directory in [real_join(QP_SRC, m) for m in sorted(os.listdir(QP_SRC))]:
+        libfile = real_join(directory, "LIB")
+        try:
+            content = ""
+            with open(libfile,'r') as f:
+                content = f.read().replace('\n','')
+                str_lib += " "+content
+        except IOError:
+            pass
 
     l_string.append("LIB = {0} ".format(str_lib))
 
+
+    l_string.append("CONFIG_FILE = {0}".format(pwd_config_file))
     l_string.append("")
 
     return l_string
diff --git a/scripts/ezfio_interface/ezfio_generate_provider.py b/scripts/ezfio_interface/ezfio_generate_provider.py
index 6b49955b..a282b834 100755
--- a/scripts/ezfio_interface/ezfio_generate_provider.py
+++ b/scripts/ezfio_interface/ezfio_generate_provider.py
@@ -52,7 +52,7 @@ BEGIN_PROVIDER [ %(type)s, %(name)s %(size)s ]
     %(test_null_size)s
     call ezfio_has_%(ezfio_dir)s_%(ezfio_name)s(has)
     if (has) then
-      write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..'
+!      write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..'
       call ezfio_get_%(ezfio_dir)s_%(ezfio_name)s(%(name)s)
     else
       print *, '%(ezfio_dir)s/%(ezfio_name)s not found in EZFIO file'
@@ -117,7 +117,7 @@ END_PROVIDER
         output = self.output
         name = self.name
         l_write = ["",
-                   "  call write_time(%(output)s)",
+                   "!  call write_time(%(output)s)",
                    ""]
 
         self.write = "\n".join(l_write) % locals()
@@ -129,7 +129,7 @@ END_PROVIDER
                 write = self.write_correspondance[self.type]
 
                 l_write = ["",
-                          "  call write_time(%(output)s)",
+                          "!  call write_time(%(output)s)",
                           "  call %(write)s(%(output)s, %(name)s, &",
                           "     '%(name)s')",
                           ""]
diff --git a/scripts/get_fci_conv.sh b/scripts/get_fci_conv.sh
new file mode 100755
index 00000000..8b0f5ac2
--- /dev/null
+++ b/scripts/get_fci_conv.sh
@@ -0,0 +1,7 @@
+file=$1
+grep "N_det             =" $1 | cut -d "=" -f 2 > N_det_tmp
+grep "E               =" $file | cut -d "=" -f 2 > E_tmp
+grep "E+PT2           =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+PT2_tmp
+grep "E+rPT2          =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+rPT2_tmp
+paste N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp | column -s ' ' -t > $file.conv_fci
+rm N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp
diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py
new file mode 100755
index 00000000..8f0b1f3c
--- /dev/null
+++ b/scripts/qp_extract_cipsi_data.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+import re
+import sys
+
+# Read output file
+with open(sys.argv[1], 'r') as file:
+  output = file.read()
+
+
+def extract_data(output):
+    lines = output.split("\n")
+    data = []
+
+    n_det = None
+    e = None
+    pt2 = None
+    err_pt2 = None
+    rpt2 = None
+    err_rpt2 = None
+    e_ex = None
+
+
+    reading = False
+    for iline, line in enumerate(lines):
+        if not reading and line.startswith(" N_det "):
+            n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1))
+            reading = True
+
+        if reading:
+            if line.startswith(" E "):
+                e = float(re.search(r"E\s+=\s+(-?\d+\.\d+)", line).group(1))
+            elif line.startswith(" PT2 "):
+                pt2 = float(re.search(r"PT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1))
+                err_pt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1))
+            elif line.startswith(" rPT2 "):
+                rpt2 = float(re.search(r"rPT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1))
+                err_rpt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1))
+            elif "minimum PT2 Extrapolated energy" in line:
+                e_ex_line = lines[iline+2]
+                e_ex = float(e_ex_line.split()[1])
+                reading = False
+
+                data.append((n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex))
+                n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None
+
+    return data
+
+data = extract_data(output)
+
+for item in data:
+    print(" ".join(str(x) for x in item))
+
+
diff --git a/scripts/script_fci_tc.sh b/scripts/script_fci_tc.sh
new file mode 100755
index 00000000..26ef2aaf
--- /dev/null
+++ b/scripts/script_fci_tc.sh
@@ -0,0 +1,33 @@
+ source ~/qp2/quantum_package.rc  
+ alpha=1.8
+ input=O
+ basis=cc-pvdz
+ mult=3
+ output=${input}_${basis}_al_${alpha}
+ qp create_ezfio -b ${basis} ${input}.xyz -m $mult
+ qp run scf 
+ qp set perturbation pt2_max 0.0001
+ qp set_frozen_core 
+
+########## FCI CALCULATION FOR REFERENCE 
+ qp run fci | tee ${EZFIO_FILE}.fci.out
+ qp run sort_wf 
+ mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_fci.wf_sorted
+########### TC SCF CALCULATION
+ qp reset -d 
+ qp set ao_two_e_erf_ints mu_erf 0.87 
+ qp set tc_keywords j1b_type 3
+ qp set tc_keywords j1b_pen "[${alpha}]"
+ qp set tc_keywords bi_ortho True 
+ qp set tc_keywords test_cycle_tc True
+ qp set tc_keywords write_tc_integ True
+ qp set tc_keywords read_tc_integ False
+ qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out
+ qp set tc_keywords write_tc_integ False
+ qp set tc_keywords read_tc_integ True
+############ TC-FCI CALCULATION
+ qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.fci_tc_bi_ortho.out
+ grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" ${EZFIO_FILE}.fci_tc_bi_ortho.out | cut -d "=" -f 2 > data_al_$alpha
+ qp run sort_wf 
+ mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_tc_fci.wf_sorted
+ 
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 00000000..6353c21a
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,11 @@
+*
+!README.rst
+!*/
+*/*
+!*/*.*
+*/*.o
+*/build.ninja
+*/ezfio_interface.irp.f
+*/.gitignore
+*/*.swp
+
diff --git a/src/ao_many_one_e_ints/NEED b/src/ao_many_one_e_ints/NEED
new file mode 100644
index 00000000..0d08442c
--- /dev/null
+++ b/src/ao_many_one_e_ints/NEED
@@ -0,0 +1,5 @@
+ao_one_e_ints
+ao_two_e_ints
+becke_numerical_grid
+mo_one_e_ints
+dft_utils_in_r
diff --git a/src/ao_many_one_e_ints/README.rst b/src/ao_many_one_e_ints/README.rst
new file mode 100644
index 00000000..6d2c083f
--- /dev/null
+++ b/src/ao_many_one_e_ints/README.rst
@@ -0,0 +1,25 @@
+==================
+ao_many_one_e_ints
+==================
+
+This module contains A LOT of one-electron integrals of the type 
+A_ij( r ) = \int dr' phi_i(r') w(r,r') phi_j(r') 
+where r is a point in real space. 
+
++) ao_gaus_gauss.irp.f: w(r,r') is a exp(-(r-r')^2)  , and can be multiplied by x/y/z
++) ao_erf_gauss.irp.f : w(r,r') is a exp(-(r-r')^2) erf(mu * |r-r'|)/|r-r'| , and can be multiplied by x/y/z
++) ao_erf_gauss_grad.irp.f: w(r,r') is a exp(-(r-r')^2) erf(mu * |r-r'|)/|r-r'| , and can be multiplied by x/y/z, but evaluated with also one gradient of an AO function. 
+
+Fit of a Slater function and corresponding integrals
+----------------------------------------------------
+The file fit_slat_gauss.irp.f contains many useful providers/routines to fit a Slater function with 20 gaussian. 
++) coef_fit_slat_gauss : coefficients of the gaussians to fit e^(-x)
++) expo_fit_slat_gauss : exponents of the gaussians to fit e^(-x)
+
+Integrals involving Slater functions : stg_gauss_int.irp.f
+
+Taylor expansion of full correlation factor
+-------------------------------------------
+In taylor_exp.irp.f you might find interesting integrals of the type 
+\int dr' exp( e^{-alpha |r-r|' - beta |r-r'|^2}) phi_i(r') phi_j(r') 
+evaluated as a Taylor expansion of the exponential. 
diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
new file mode 100644
index 00000000..3d7fbe50
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -0,0 +1,1113 @@
+
+! ---
+
+subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
+ implicit none
+ BEGIN_DOC
+! xyz_ints(1/2/3) = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  x/y/z phi_i(r)
+!
+! where phi_i and phi_j are AOs
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: xyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ integer :: n_pt_in,l,m,mm
+ xyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ n_pt_in = n_pt_max_integrals
+ ! j
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do mm = 1, 3
+     ! (x phi_i ) * phi_j
+     ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
+     !
+     ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
+     power_B_tmp = power_B
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
+     xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j)             &
+                                            * ao_coef_normalized_ordered_transp(m,i)
+     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1
+     power_B_tmp(mm) += 1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
+     xyz_ints(mm) += contrib * 1.d0        * ao_coef_normalized_ordered_transp(l,j)             &
+                                           * ao_coef_normalized_ordered_transp(m,i)
+    enddo
+  enddo
+ enddo
+end
+
+! ---
+
+double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
+
+  BEGIN_DOC
+  ! phi_j_erf_mu_r_phi  = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  phi_i(r)
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i,j
+  double precision, intent(in) :: mu_in, C_center(3)
+
+  integer          :: num_A, power_A(3), num_b, power_B(3)
+  integer          :: n_pt_in, l, m
+  double precision :: alpha, beta, A_center(3), B_center(3), contrib
+
+  double precision :: NAI_pol_mult_erf
+
+  phi_j_erf_mu_r_phi = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  n_pt_in = n_pt_max_integrals
+
+  ! j
+  num_A         = ao_nucl(j)
+  power_A(1:3)  = ao_power(j,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+
+  ! i
+  num_B         = ao_nucl(i)
+  power_B(1:3)  = ao_power(i,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  do l = 1, ao_prim_num(j)
+   alpha = ao_expo_ordered_transp(l,j)
+   do m = 1, ao_prim_num(i)
+     beta = ao_expo_ordered_transp(m,i)
+
+     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+
+     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i)
+   enddo
+  enddo
+
+end function phi_j_erf_mu_r_phi
+
+! ---
+
+subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
+ implicit none
+ BEGIN_DOC
+  ! gauss_ints(m) =   \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-r'|))/ |r-r'| * AO_i(r') * AO_j(r')
+  !
+  ! with m = 1 ==> x, m = 2, m = 3 ==> z
+  !
+  !      m = 4 ==> no x/y/z
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu, C_center(3),delta
+ double precision, intent(out):: gauss_ints(4)
+
+ integer :: num_A,power_A(3), num_b, power_B(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: xyz_ints(4)
+ integer :: n_pt_in,l,m,mm
+ gauss_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ n_pt_in = n_pt_max_integrals
+ ! j
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ gauss_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
+    do mm = 1, 4
+     gauss_ints(mm) += xyz_ints(mm)  * ao_coef_normalized_ordered_transp(l,j)             &
+                                     * ao_coef_normalized_ordered_transp(m,i)
+    enddo
+  enddo
+ enddo
+end
+
+! ---
+
+subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
+
+  BEGIN_DOC
+  !
+  ! gauss_ints = \int dr exp(-delta (r - C)^2) * erf(mu |r-C|) / |r-C| * AO_i(r) * AO_j(r)
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j
+  double precision, intent(in)  :: mu, C_center(3), delta
+  double precision, intent(out) :: gauss_ints
+
+  integer                       :: n_pt_in, l, m
+  integer                       :: num_A, power_A(3), num_b, power_B(3)
+  double precision              :: alpha, beta, A_center(3), B_center(3), coef
+  double precision              :: integral
+
+  double precision              :: erf_mu_gauss
+
+  gauss_ints = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  n_pt_in = n_pt_max_integrals
+
+  ! j
+  num_A         = ao_nucl(j)
+  power_A(1:3)  = ao_power(j,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+
+  ! i
+  num_B         = ao_nucl(i)
+  power_B(1:3)  = ao_power(i,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  do l = 1, ao_prim_num(j)
+   alpha = ao_expo_ordered_transp(l,j)
+   do m = 1, ao_prim_num(i)
+     beta = ao_expo_ordered_transp(m,i)
+     coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i)
+
+     if(dabs(coef) .lt. 1.d-12) cycle
+
+     integral = erf_mu_gauss(C_center, delta, mu, A_center, B_center, power_A, power_B, alpha, beta, n_pt_in)
+
+     gauss_ints += integral * coef
+   enddo
+  enddo
+
+end subroutine erf_mu_gauss_ij_ao
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+  double precision, intent(out) :: ints(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3), m
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  ints = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      power_xA = power_A
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        ints(m) += integral * coef
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        ints(m) += A_center(m) * integral * coef
+
+      enddo
+    enddo
+  enddo
+
+end subroutine NAI_pol_x_mult_erf_ao
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  integer                       :: power_xA(3), m, ipoint
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v0
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, LD_integral
+  integer                       :: power_xA(3), m, ipoint, n_points_m
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_xA, power_B, alpha, beta & 
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_A, power_B, alpha, beta &
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += A_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += A_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += A_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_z
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_x(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_x
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_x = NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += Ai_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_y(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_y
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_y = NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += Ai_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_z(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_z
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_z = NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += Ai_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_z
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+  double precision, intent(out) :: ints(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
+    return
+  endif
+
+  ints = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
+        ints(m) += integral * coef
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
+        ints(m) += Ai_center(m) * integral * coef
+
+      enddo
+    enddo
+  enddo
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v0(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3), C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                       :: ipoint
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+    return
+  endif
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v0
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3,3), C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                       :: ipoint, n_points_m, LD_integral
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    print *, 'small beta', i_ao, j_ao
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+    return
+  endif
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v
+
+! ---
+
+subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
+ implicit none
+  BEGIN_DOC
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr X(m) * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z
+  END_DOC
+ include 'utils/constants.include.F'
+ integer, intent(in) :: i_ao,j_ao,m
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: ints
+ double precision               :: A_center(3), B_center(3),integral, alpha,beta
+ double precision               :: NAI_pol_mult_erf
+ integer                        :: i,j,num_A,num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+ ints = 0.d0
+ if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i_ao)
+ power_A(1:3)= ao_power(i_ao,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j_ao)
+ power_B(1:3)= ao_power(j_ao,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ n_pt_in = n_pt_max_integrals
+
+ do i = 1, ao_prim_num(i_ao)
+  alpha = ao_expo_ordered_transp(i,i_ao)
+    power_xA = power_A
+    ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+    power_xA(m) += 1
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center,B_center,power_xA,power_B,alpha,beta,C_center,n_pt_in,mu_in)
+      ints += integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao)
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in)
+      ints += A_center(m) * integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao)
+    enddo
+ enddo
+end
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f
new file mode 100644
index 00000000..8a32c38a
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f
@@ -0,0 +1,150 @@
+subroutine phi_j_erf_mu_r_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_i(r) [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf,coef,thr
+ integer :: n_pt_in,l,m,mm
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i 
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) 
+    if(dabs(coef).lt.thr)cycle
+    do mm = 1, 3
+     ! (d/dx phi_i ) * phi_j 
+     ! d/dx * (x - B_x)^b_x exp(-beta * (x -B_x)^2)= [b_x * (x - B_x)^(b_x - 1) - 2 beta * (x - B_x)^(b_x + 1)] exp(-beta * (x -B_x)^2)
+     !
+     ! first contribution :: b_x (x - B_x)^(b_x-1) :: integral with b_x=>b_x-1 multiplied by b_x
+     power_B_tmp = power_B
+     power_B_tmp(mm) += -1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     dxyz_ints(mm) += contrib * dble(power_B(mm)) * coef 
+                                                  
+     ! second contribution ::  - 2 beta * (x - B_x)^(b_x + 1) :: integral with b_x=> b_x+1 multiplied by -2 * beta
+     power_B_tmp = power_B
+     power_B_tmp(mm) += 1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     dxyz_ints(mm) += contrib * (-2.d0 * beta )  * coef 
+                                                 
+    enddo
+  enddo
+ enddo
+end
+
+
+
+
+subroutine phi_j_erf_mu_r_dxyz_phi_bis(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: thr, coef
+ integer :: n_pt_in,l,m,mm,kk
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j == A 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i == B
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ dxyz_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do kk = 1, 2 ! loop over the extra terms induced by the d/dx/y/z * AO(i)
+     do mm = 1, 3
+      power_B_tmp = power_B
+      power_B_tmp(mm) = power_ord_grad_transp(kk,mm,i)
+      coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_ord_grad_transp(kk,mm,m,i) 
+      if(dabs(coef).lt.thr)cycle
+      contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+      dxyz_ints(mm) += contrib * coef 
+     enddo
+    enddo
+  enddo
+ enddo
+end
+
+subroutine phi_j_erf_mu_r_xyz_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_j(r) x/y/z [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: thr, coef
+ integer :: n_pt_in,l,m,mm,kk
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j == A 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i == B
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ dxyz_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do kk = 1, 4 ! loop over the extra terms induced by the x/y/z * d dx/y/z AO(i)
+     do mm = 1, 3
+      power_B_tmp = power_B
+      power_B_tmp(mm) = power_ord_xyz_grad_transp(kk,mm,i)
+      coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_ord_xyz_grad_transp(kk,mm,m,i) 
+      if(dabs(coef).lt.thr)cycle
+      contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+      dxyz_ints(mm) += contrib * coef 
+     enddo
+    enddo
+  enddo
+ enddo
+end
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
new file mode 100644
index 00000000..d2115d9e
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -0,0 +1,426 @@
+! ---
+
+subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
+
+ implicit none
+ BEGIN_DOC
+! gauss_ints(m) = \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2}
+!
+! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in)  :: D_center(3), delta
+ double precision, intent(out) :: gauss_ints(3)
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k,m
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,gauss_ints_tmp(3)
+ gauss_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i)
+ power_A(1:3)= ao_power(i,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j)
+ power_B(1:3)= ao_power(j,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ do l=1,ao_prim_num(i)
+  alpha = ao_expo_ordered_transp(l,i)
+  do k=1,ao_prim_num(j)
+   beta = ao_expo_ordered_transp(k,j)
+   call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp)
+   do m = 1, 3
+    gauss_ints(m) += gauss_ints_tmp(m) *  ao_coef_normalized_ordered_transp(l,i)             &
+                                       *  ao_coef_normalized_ordered_transp(k,j)
+   enddo
+  enddo
+ enddo
+
+end
+
+
+
+double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,mx)
+ implicit none
+ BEGIN_DOC
+! \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2}
+!
+! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
+ END_DOC
+ integer, intent(in) :: i,j,mx
+ double precision, intent(in)  :: D_center(3), delta
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k
+ double precision :: gauss_int
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta
+ double precision :: overlap_gauss_xyz_r12_specific
+ overlap_gauss_xyz_r12_ao_specific = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i)
+ power_A(1:3)= ao_power(i,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j)
+ power_B(1:3)= ao_power(j,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ do l=1,ao_prim_num(i)
+  alpha = ao_expo_ordered_transp(l,i)
+  do k=1,ao_prim_num(j)
+   beta = ao_expo_ordered_transp(k,j)
+   gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
+   overlap_gauss_xyz_r12_ao_specific = gauss_int *  ao_coef_normalized_ordered_transp(l,i)             &
+                                                 *  ao_coef_normalized_ordered_transp(k,j)
+  enddo
+ enddo
+end
+
+
+subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints)
+ implicit none
+ double precision, intent(in) :: D_center(3), delta
+ double precision, intent(out):: aos_ints(ao_num,ao_num)
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k,i,j
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,analytical_j
+ aos_ints = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(ao_overlap_abs(j,i).lt.1.d-12)cycle
+   num_A = ao_nucl(i)
+   power_A(1:3)= ao_power(i,1:3)
+   A_center(1:3) = nucl_coord(num_A,1:3)
+   num_B = ao_nucl(j)
+   power_B(1:3)= ao_power(j,1:3)
+   B_center(1:3) = nucl_coord(num_B,1:3)
+   do l=1,ao_prim_num(i)
+    alpha = ao_expo_ordered_transp(l,i)
+    do k=1,ao_prim_num(j)
+     beta = ao_expo_ordered_transp(k,j)
+     analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+     aos_ints(j,i) += analytical_j *  ao_coef_normalized_ordered_transp(l,i)             &
+                                   *  ao_coef_normalized_ordered_transp(k,j)
+    enddo
+   enddo
+  enddo
+ enddo
+end
+
+! ---
+
+! TODO :: PUT CYCLES IN LOOPS
+double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_gauss_r12
+
+  overlap_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_gauss_r12_ao += coef * analytical_j
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
+subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  !
+  ! n_points: nb of integrals <= min(LD_D, LD_resv)
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, LD_D, LD_resv, n_points
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
+
+  integer                       :: ipoint
+  integer                       :: power_A(3), power_B(3), l, k
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef, coef1 
+  double precision, allocatable :: analytical_j(:)
+
+  resv(:) = 0.d0
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(analytical_j(n_points))
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points, n_points)
+
+      do ipoint = 1, n_points
+        resv(ipoint) = resv(ipoint) + coef * analytical_j(ipoint)
+      enddo
+
+    enddo
+  enddo
+
+  deallocate(analytical_j)
+
+end subroutine overlap_gauss_r12_ao_v
+
+! ---
+
+double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: B_center(3), beta, D_center(3), delta
+
+  integer                      :: power_A1(3), power_A2(3), l, k
+  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j
+  double precision             :: G_center(3), gama, fact_g, gama_inv
+
+  double precision, external   :: overlap_gauss_r12, overlap_gauss_r12_ao
+
+  if(beta .lt. 1d-10) then
+    overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j)
+    return
+  endif
+
+  overlap_gauss_r12_ao_with1s = 0.d0
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama        = beta + delta
+  gama_inv    = 1.d0 / gama
+  G_center(1) = (beta * B_center(1) + delta * D_center(1)) * gama_inv
+  G_center(2) = (beta * B_center(2) + delta * D_center(2)) * gama_inv
+  G_center(3) = (beta * B_center(3) + delta * D_center(3)) * gama_inv
+  fact_g      = beta * delta * gama_inv * ( (B_center(1) - D_center(1)) * (B_center(1) - D_center(1)) &
+                                          + (B_center(2) - D_center(2)) * (B_center(2) - D_center(2)) &
+                                          + (B_center(3) - D_center(3)) * (B_center(3) - D_center(3)) )
+  if(fact_g .gt. 10d0) return
+  fact_g = dexp(-fact_g)
+
+  ! ---
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp                    (l,i)
+    coef1  = fact_g * ao_coef_normalized_ordered_transp(l,i)
+    if(dabs(coef1) .lt. 1d-12) cycle
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2)
+
+      overlap_gauss_r12_ao_with1s += coef12 * analytical_j
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao_with1s
+
+! ---
+
+subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  ! using an array of D_centers.
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, n_points, LD_D, LD_resv
+  double precision, intent(in)  :: B_center(3), beta, D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
+
+  integer                       :: ipoint
+  integer                       :: power_A1(3), power_A2(3), l, k
+  double precision              :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
+  double precision              :: coef12, coef12f
+  double precision              :: gama, gama_inv
+  double precision              :: bg, dg, bdg
+  double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:)
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ASSERT(beta .gt. 0.d0)
+
+  if(beta .lt. 1d-10) then
+    call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+    return
+  endif
+
+  resv(:) = 0.d0
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama     = beta + delta
+  gama_inv = 1.d0 / gama
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
+
+  bg  = beta  * gama_inv
+  dg  = delta * gama_inv
+  bdg = bg * delta 
+
+  do ipoint = 1, n_points
+
+    G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
+    G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
+    G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+
+    if(fact_g(ipoint) < 10d0) then
+      fact_g(ipoint) = dexp(-fact_g(ipoint))
+    else
+      fact_g(ipoint) = 0.d0
+    endif
+
+  enddo
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp           (l,i)
+    coef1  = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(G_center, n_points, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points, n_points)
+
+      do ipoint = 1, n_points
+        coef12f = coef12 * fact_g(ipoint)
+        resv(ipoint) += coef12f * analytical_j(ipoint)
+      enddo
+    enddo
+  enddo
+
+  deallocate(fact_g, G_center, analytical_j)
+
+end subroutine overlap_gauss_r12_ao_with1s_v
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/fit_slat_gauss.irp.f b/src/ao_many_one_e_ints/fit_slat_gauss.irp.f
new file mode 100644
index 00000000..052ad072
--- /dev/null
+++ b/src/ao_many_one_e_ints/fit_slat_gauss.irp.f
@@ -0,0 +1,94 @@
+ BEGIN_PROVIDER [integer, n_max_fit_slat]
+ implicit none
+ BEGIN_DOC
+! number of gaussian to fit exp(-x)
+!
+! I took 20 gaussians from the program bassto.f
+ END_DOC
+ n_max_fit_slat = 20
+ END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, coef_fit_slat_gauss, (n_max_fit_slat)]
+&BEGIN_PROVIDER [double precision, expo_fit_slat_gauss, (n_max_fit_slat)]
+ implicit none
+  include 'constants.include.F'
+ BEGIN_DOC
+ ! fit the exp(-x) as 
+ !
+ ! \sum_{i = 1, n_max_fit_slat} coef_fit_slat_gauss(i) * exp(-expo_fit_slat_gauss(i) * x**2)
+ !
+ ! The coefficient are taken from the program bassto.f
+ END_DOC
+
+
+      expo_fit_slat_gauss(01)=30573.77073000000
+      coef_fit_slat_gauss(01)=0.00338925525
+      expo_fit_slat_gauss(02)=5608.45238100000
+      coef_fit_slat_gauss(02)=0.00536433869
+      expo_fit_slat_gauss(03)=1570.95673400000
+      coef_fit_slat_gauss(03)=0.00818702846
+      expo_fit_slat_gauss(04)=541.39785110000
+      coef_fit_slat_gauss(04)=0.01202047655
+      expo_fit_slat_gauss(05)=212.43469630000
+      coef_fit_slat_gauss(05)=0.01711289568
+      expo_fit_slat_gauss(06)=91.31444574000
+      coef_fit_slat_gauss(06)=0.02376001022
+      expo_fit_slat_gauss(07)=42.04087246000
+      coef_fit_slat_gauss(07)=0.03229121736
+      expo_fit_slat_gauss(08)=20.43200443000
+      coef_fit_slat_gauss(08)=0.04303646818
+      expo_fit_slat_gauss(09)=10.37775161000
+      coef_fit_slat_gauss(09)=0.05624657578
+      expo_fit_slat_gauss(10)=5.46880754500
+      coef_fit_slat_gauss(10)=0.07192311571
+      expo_fit_slat_gauss(11)=2.97373529200
+      coef_fit_slat_gauss(11)=0.08949389001
+      expo_fit_slat_gauss(12)=1.66144190200
+      coef_fit_slat_gauss(12)=0.10727599240
+      expo_fit_slat_gauss(13)=0.95052560820
+      coef_fit_slat_gauss(13)=0.12178961750
+      expo_fit_slat_gauss(14)=0.55528683970
+      coef_fit_slat_gauss(14)=0.12740141870
+      expo_fit_slat_gauss(15)=0.33043360020
+      coef_fit_slat_gauss(15)=0.11759168160
+      expo_fit_slat_gauss(16)=0.19982303230
+      coef_fit_slat_gauss(16)=0.08953504394
+      expo_fit_slat_gauss(17)=0.12246840760
+      coef_fit_slat_gauss(17)=0.05066721317
+      expo_fit_slat_gauss(18)=0.07575825322
+      coef_fit_slat_gauss(18)=0.01806363869
+      expo_fit_slat_gauss(19)=0.04690146243
+      coef_fit_slat_gauss(19)=0.00305632563
+      expo_fit_slat_gauss(20)=0.02834749861
+      coef_fit_slat_gauss(20)=0.00013317513
+
+
+
+END_PROVIDER 
+
+double precision function slater_fit_gam(x,gam)
+ implicit none
+ double precision, intent(in) :: x,gam
+ BEGIN_DOC
+! fit of the function exp(-gam * x) with gaussian functions 
+ END_DOC
+ integer :: i
+ slater_fit_gam = 0.d0
+ do i = 1, n_max_fit_slat
+  slater_fit_gam += coef_fit_slat_gauss(i) * dexp(-expo_fit_slat_gauss(i) * gam * gam * x * x)
+ enddo
+end
+
+subroutine expo_fit_slater_gam(gam,expos)
+ implicit none
+ BEGIN_DOC
+! returns the array of the exponents of the gaussians to fit exp(-gam*x)
+ END_DOC
+ double precision, intent(in)  :: gam
+ double precision, intent(out) :: expos(n_max_fit_slat)
+ integer :: i
+ do i = 1, n_max_fit_slat
+  expos(i) = expo_fit_slat_gauss(i) * gam * gam
+ enddo
+end
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..8e253d75
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,560 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+  double precision              :: int_gauss, dsqpi_3_2, int_j1b
+  double precision              :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,                                     &
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,                   &
+     !$OMP          final_grid_points_transp, ng_fit_jast,                                                  &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,                                             &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,                                           &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,              &
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+          cycle
+        endif
+
+        ! --- --- ---
+        ! i_1s = 1
+        ! --- --- ---
+
+        int_j1b = ao_abs_comb_b3_j1b(1,j,i)
+        do i_fit = 1, ng_fit_jast
+          expo_fit = expo_gauss_1_erf_x_2(i_fit)
+          coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit)
+          if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle
+          int_gauss = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+        enddo
+
+        ! --- --- ---
+        ! i_1s > 1
+        ! --- --- ---
+  
+        do i_1s = 2, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+  
+          do i_fit = 1, ng_fit_jast
+            expo_fit = expo_gauss_1_erf_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+            int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+            int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+          enddo
+        enddo
+
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, big_array,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+       do i_fit = 1, ng_fit_jast
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+ deallocate(int_fit_v)
+ !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+
+  print*, ' providing int2_u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+
+        ! --- --- ---
+        ! i_1s = 1
+        ! --- --- ---
+
+        int_j1b = ao_abs_comb_b3_j1b(1,j,i)
+        if(dabs(int_j1b).lt.1.d-10) cycle
+        do i_fit = 1, ng_fit_jast
+          expo_fit = expo_gauss_j_mu_x_2(i_fit)
+          coef_fit = coef_gauss_j_mu_x_2(i_fit)
+          if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += coef_fit * int_fit
+        enddo
+
+        ! --- --- ---
+        ! i_1s > 1
+        ! --- --- ---
+
+        do i_1s = 2, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+
+  print*, ' providing int2_u_grad1u_x_j1b2_test ...'
+
+  sq_pi_3_2 = dacos(-1.D0)**(1.d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+
+  print*, ' providing int2_u_grad1u_j1b2_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10) cycle
+
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+
+        ! --- --- ---
+        ! i_1s = 1
+        ! --- --- ---
+
+        int_j1b = ao_abs_comb_b3_j1b(1,j,i)
+        if(dabs(int_j1b).lt.1.d-10) cycle
+        do i_fit = 1, ng_fit_jast
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.1.d-15) cycle
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+          int_fit  = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r,  1.d+9, r)
+          tmp += coef_fit * int_fit
+        enddo
+
+        ! --- --- ---
+        ! i_1s > 1
+        ! --- --- ---
+
+        do i_1s = 2, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+          do i_fit = 1, ng_fit_jast
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
new file mode 100644
index 00000000..8196614f
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -0,0 +1,420 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_grad1u2_grad2u2_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_1_erf_x_2(i_fit)
+          coef_fit = coef_gauss_1_erf_x_2(i_fit)
+
+          ! ---
+
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += -0.25d0 * coef_fit * int_fit
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += -0.25d0 * coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_u2_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u2_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_x_2(i_fit)
+          coef_fit = coef_gauss_j_mu_x_2(i_fit)
+
+          ! ---
+
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += coef_fit * int_fit
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+         
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+
+  print*, ' providing int2_u_grad1u_x_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u_grad1u_x_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+          ! ---
+
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
+          tmp_x += coef_fit * int_fit(1)
+          tmp_y += coef_fit * int_fit(2)
+          tmp_z += coef_fit * int_fit(3)
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+          
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+
+  print*, ' providing int2_u_grad1u_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u_grad1u_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+          ! ---
+
+          int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
+
+          tmp += coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 80.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2(j,i,ipoint) = int2_u_grad1u_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
new file mode 100644
index 00000000..21927371
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -0,0 +1,453 @@
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_grad1u2_grad2u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                     &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+! !$OMP          coef_fit, expo_fit, int_fit_v, n_mask_grid,        &
+! !$OMP          i_mask_grid, r_mask_grid)                          &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+! !$OMP          final_grid_points_transp, n_max_fit_slat,          &
+! !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,        &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,      &
+! !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,  &
+! !$OMP          ao_overlap_abs)
+!
+!  allocate(int_fit_v(n_points_final_grid))
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+!        cycle
+!      endif
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_1_erf_x_2(i_fit)
+!        coef_fit = coef_gauss_1_erf_x_2(i_fit) * (-0.25d0)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_grad1u2_grad2u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3), tmp
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                      &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+! !$OMP          coef_fit, expo_fit, int_fit_v,                      &
+! !$OMP          i_mask_grid, n_mask_grid, r_mask_grid )             &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,           &
+! !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
+! !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+!
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!  allocate(int_fit_v(n_points_final_grid))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_x_2(i_fit)
+!        coef_fit = coef_gauss_j_mu_x_2(i_fit)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!
+!        do ipoint = 1, n_points_final_grid
+!          int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+!  !
+!  END_DOC
+!
+!  implicit none
+!
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid(3)
+!  double precision              :: x, y, z, expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: alpha_1s, alpha_1s_inv, expo_coef_1s
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:,:)
+!  double precision, allocatable :: r_mask_grid(:,:,:)
+!  double precision, allocatable :: int_fit_v(:,:), dist(:,:), centr_1s(:,:,:)
+!
+!  print*, ' providing int2_u_grad1u_x_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                          &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, x, y, z, coef, beta,         &
+! !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s, dist, B_center,&
+! !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s,                   &
+! !$OMP          i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid,  &
+! !$OMP          n_mask_grid, r_mask_grid)                               &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,     &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+! !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,           &
+! !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+!
+!  allocate(dist(n_points_final_grid,3))
+!  allocate(centr_1s(n_points_final_grid,3,3))
+!  allocate(n_mask_grid(n_points_final_grid,3))
+!  allocate(r_mask_grid(n_points_final_grid,3,3))
+!  allocate(int_fit_v(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+!        coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+!
+!        ! ---
+!
+!        call NAI_pol_x_mult_erf_ao_with1s_v0(i, j, expo_fit, final_grid_points_transp, n_points_final_grid, 1.d+9, final_grid_points_transp, n_points_final_grid, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid1 = 0    ! dim
+!        i_mask_grid2 = 0    ! dim
+!        i_mask_grid3 = 0    ! dim
+!        n_mask_grid  = 0    ! ind
+!        r_mask_grid  = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1)
+!
+!          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
+!            i_mask_grid1 += 1
+!            n_mask_grid(i_mask_grid1,  1) = ipoint
+!            r_mask_grid(i_mask_grid1,1,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid1,2,1) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid1,3,1) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2)
+!
+!          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
+!            i_mask_grid2 += 1
+!            n_mask_grid(i_mask_grid2,  2) = ipoint
+!            r_mask_grid(i_mask_grid2,1,2) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid2,2,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid2,3,2) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3)
+!
+!          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
+!            i_mask_grid3 += 1
+!            n_mask_grid(i_mask_grid3,  3) = ipoint
+!            r_mask_grid(i_mask_grid3,1,3) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid3,2,3) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid3,3,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!        enddo
+!
+!        if((i_mask_grid1+i_mask_grid2+i_mask_grid3) .eq. 0) cycle
+!
+!        i_mask_grid(1) = i_mask_grid1
+!        i_mask_grid(2) = i_mask_grid2
+!        i_mask_grid(3) = i_mask_grid3
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          alpha_1s     = beta + expo_fit
+!          alpha_1s_inv = 1.d0 / alpha_1s
+!          expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+!
+!          do ipoint = 1, i_mask_grid1
+!
+!            x = r_mask_grid(ipoint,1,1)
+!            y = r_mask_grid(ipoint,2,1)
+!            z = r_mask_grid(ipoint,3,1)
+!
+!            centr_1s(ipoint,1,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,1) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,1) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,1) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!
+!            x = r_mask_grid(ipoint,1,2)
+!            y = r_mask_grid(ipoint,2,2)
+!            z = r_mask_grid(ipoint,3,2)
+!
+!            centr_1s(ipoint,1,2) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,2) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,2) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!
+!            x = r_mask_grid(ipoint,1,3)
+!            y = r_mask_grid(ipoint,2,3)
+!            z = r_mask_grid(ipoint,3,3)
+!
+!            centr_1s(ipoint,1,3) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,3) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,3) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid1
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(dist)
+!  deallocate(centr_1s)
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0
+!
+!END_PROVIDER
+!
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..5c9f81e9
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,396 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+
+  print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          ! TODO :: cycle on the 1 - erf(mur12)
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)       & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle 
+!          endif
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+  double precision           :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3), coeftot
+  double precision           :: sigma_ij, dist_ij_ipoint, dsqpi_3_2, int_j1b
+
+  double precision, external :: overlap_gauss_r12_ao
+  double precision, external :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing v_ij_u_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,                        & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+
+        ! --- --- ---
+        ! i_1s = 1
+        ! --- --- ---
+
+        int_j1b = ao_abs_comb_b2_j1b(1,j,i)
+        if(dabs(int_j1b).lt.1.d-10) cycle
+        do i_fit = 1, ng_fit_jast
+          expo_fit = expo_gauss_j_mu_x(i_fit)
+          coef_fit = coef_gauss_j_mu_x(i_fit)
+          if(ao_overlap_abs_grid(j,i).lt.1.d-15) cycle
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += coef_fit * int_fit
+        enddo
+
+        ! --- --- ---
+        ! i_1s > 1
+        ! --- --- ---
+
+        do i_1s = 2, List_comb_thr_b2_size(j,i)
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+  double precision           :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3), coeftot
+  double precision           :: sigma_ij, dist_ij_ipoint, dsqpi_3_2, int_j1b
+
+  double precision, external :: overlap_gauss_r12_ao
+  double precision, external :: overlap_gauss_r12_ao_with1s
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center,       &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,                        & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,        &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,                                  &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,    & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b, &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+
+        ! --- --- ---
+        ! i_1s = 1
+        ! --- --- ---
+
+        int_j1b = ao_abs_comb_b2_j1b(1,j,i)
+        if(dabs(int_j1b).lt.1.d-10) cycle
+        expo_fit = expo_good_j_mu_1gauss
+        int_fit  = overlap_gauss_r12_ao(r, expo_fit, i, j)
+        tmp += int_fit
+
+        ! --- --- ---
+        ! i_1s > 1
+        ! --- --- ---
+
+        do i_1s = 2, List_comb_thr_b2_size(j,i)
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+!          do i_fit = 1, ng_fit_jast
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
new file mode 100644
index 00000000..fc30cd83
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -0,0 +1,300 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+
+  print *, ' providing v_ij_erf_rk_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  v_ij_erf_rk_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, &
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,   &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b, mu_erf)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
+
+        tmp += coef * (int_mu - int_coulomb)
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
+
+          coef        = List_all_comb_b2_coef  (i_1s)
+          beta        = List_all_comb_b2_expo  (i_1s)
+          B_center(1) = List_all_comb_b2_cent(1,i_1s)
+          B_center(2) = List_all_comb_b2_cent(2,i_1s)
+          B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        ! ---
+
+        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+
+        tmp_x += coef * (ints(1) - ints_coulomb(1))
+        tmp_y += coef * (ints(2) - ints_coulomb(2))
+        tmp_z += coef * (ints(3) - ints_coulomb(3))
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
+
+          coef        = List_all_comb_b2_coef  (i_1s)
+          beta        = List_all_comb_b2_expo  (i_1s)
+          B_center(1) = List_all_comb_b2_cent(1,i_1s)
+          B_center(2) = List_all_comb_b2_cent(2,i_1s)
+          B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        ! ---
+
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing v_ij_u_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  v_ij_u_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
+ !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_x(i_fit)
+          coef_fit = coef_gauss_j_mu_x(i_fit)
+
+          ! ---
+
+          coef        = List_all_comb_b2_coef  (1)
+          beta        = List_all_comb_b2_expo  (1)
+          B_center(1) = List_all_comb_b2_cent(1,1)
+          B_center(2) = List_all_comb_b2_cent(2,1)
+          B_center(3) = List_all_comb_b2_cent(3,1)
+
+          int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
+
+          tmp += coef * coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b2_size
+
+            coef        = List_all_comb_b2_coef  (i_1s)
+            beta        = List_all_comb_b2_expo  (i_1s)
+            B_center(1) = List_all_comb_b2_cent(1,i_1s)
+            B_center(2) = List_all_comb_b2_cent(2,i_1s)
+            B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b(j,i,ipoint) = v_ij_u_cst_mu_j1b(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f
new file mode 100644
index 00000000..8624e7b8
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f
@@ -0,0 +1,437 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3)
+  double precision :: int_mu, int_coulomb
+  double precision :: wall0, wall1
+
+  double precision :: NAI_pol_mult_erf_ao
+
+  print*, ' providing v_ij_erf_rk_cst_mu ...'
+
+  provide mu_erf final_grid_points 
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu = 0.d0
+
+ !$OMP PARALLEL                                       &
+ !$OMP DEFAULT (NONE)                                 &
+ !$OMP PRIVATE (i, j, ipoint, r, int_mu, int_coulomb) & 
+ !$OMP SHARED  (ao_num, n_points_final_grid, v_ij_erf_rk_cst_mu, final_grid_points, mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        int_mu      = NAI_pol_mult_erf_ao(i, j, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao(i, j,  1.d+9, r)
+
+        v_ij_erf_rk_cst_mu(j,i,ipoint) = int_mu - int_coulomb
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ 
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu(j,i,ipoint) = v_ij_erf_rk_cst_mu(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R| - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3)
+  double precision :: int_mu, int_coulomb
+  double precision :: wall0, wall1
+  double precision :: NAI_pol_mult_erf_ao
+
+  print *, ' providing v_ij_erf_rk_cst_mu_transp ...'
+
+  provide mu_erf final_grid_points 
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                  &
+ !$OMP DEFAULT (NONE)                            &
+ !$OMP PRIVATE (i,j,ipoint,r,int_mu,int_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,v_ij_erf_rk_cst_mu_transp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        int_mu      = NAI_pol_mult_erf_ao(i, j, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao(i, j,  1.d+9, r)
+
+        v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = int_mu - int_coulomb
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 2, ao_num
+    do j = 1, i-1
+      do ipoint = 1, n_points_final_grid
+        v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = v_ij_erf_rk_cst_mu_transp(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3), ints(3), ints_coulomb(3)
+  double precision :: wall0, wall1
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,x_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        call NAI_pol_x_mult_erf_ao(i, j, mu_erf, r, ints        )
+        call NAI_pol_x_mult_erf_ao(i, j, 1.d+9 , r, ints_coulomb)
+
+        x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint) = ints(1) - ints_coulomb(1)
+        x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint) = ints(2) - ints_coulomb(2)
+        x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ 
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,3,n_points_final_grid)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...'
+
+  call wall_time(wall0)
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        x_v_ij_erf_rk_cst_mu_transp(j,i,1,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp(j,i,2,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp(j,i,3,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...'
+
+  call wall_time(wall0)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,1) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,2) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,3) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! d_dx_v_ij_erf_rk_cst_mu_tmp(m,R,j,i) = int dr phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  END_DOC
+
+ implicit none
+ integer          :: i, j, ipoint
+ double precision :: r(3), ints(3), ints_coulomb(3)
+ double precision :: wall0, wall1
+
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,d_dx_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        call phi_j_erf_mu_r_dxyz_phi(j, i, mu_erf, r, ints)
+        call phi_j_erf_mu_r_dxyz_phi(j, i,  1.d+9, r, ints_coulomb)
+
+        d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i) = ints(1) - ints_coulomb(1)
+        d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i) = ints(2) - ints_coulomb(2)
+        d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid, ao_num, ao_num, 3)]
+
+  BEGIN_DOC
+  !
+  ! d_dx_v_ij_erf_rk_cst_mu_tmp(j,i,R,m) = int dr phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,1) = d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i)
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,2) = d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i)
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,3) = d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! x_d_dx_v_ij_erf_rk_cst_mu_tmp(m,j,i,R) = int dr x phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3), ints(3), ints_coulomb(3)
+  double precision :: wall0, wall1
+
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,x_d_dx_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        call phi_j_erf_mu_r_xyz_dxyz_phi(j, i, mu_erf, r, ints)
+        call phi_j_erf_mu_r_xyz_dxyz_phi(j, i,  1.d+9, r, ints_coulomb)
+
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i) = ints(1) - ints_coulomb(1)
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i) = ints(2) - ints_coulomb(2)
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid,ao_num, ao_num,3)]
+
+  BEGIN_DOC
+  !
+  ! x_d_dx_v_ij_erf_rk_cst_mu_tmp(j,i,R,m) = int dr x phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,1) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i)
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,2) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i)
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,3) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..d5d88007
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, thr, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list)
+END_PROVIDER
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
new file mode 100644
index 00000000..4698cb27
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -0,0 +1,237 @@
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b2_size]
+
+  implicit none
+
+  List_all_comb_b2_size = 2**nucl_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)]
+
+  implicit none
+  integer :: i, j
+
+  if(nucl_num .gt. 32) then
+    print *, ' nucl_num = ', nucl_num, '> 32'
+    stop
+  endif
+
+  List_all_comb_b2 = 0
+
+  do i = 0, List_all_comb_b2_size-1
+    do j = 0, nucl_num-1
+      if (btest(i,j)) then
+        List_all_comb_b2(j+1,i+1) = 1
+      endif
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, List_all_comb_b2_coef, (   List_all_comb_b2_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b2_expo, (   List_all_comb_b2_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b2_cent, (3, List_all_comb_b2_size)]
+
+  implicit none
+  integer          :: i, j, k, phase
+  double precision :: tmp_alphaj, tmp_alphak
+  double precision :: tmp_cent_x, tmp_cent_y, tmp_cent_z
+
+  provide j1b_pen
+
+  List_all_comb_b2_coef = 0.d0
+  List_all_comb_b2_expo = 0.d0
+  List_all_comb_b2_cent = 0.d0
+
+  do i = 1, List_all_comb_b2_size
+
+    tmp_cent_x = 0.d0
+    tmp_cent_y = 0.d0
+    tmp_cent_z = 0.d0
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+      List_all_comb_b2_expo(i) += tmp_alphaj
+      tmp_cent_x               += tmp_alphaj * nucl_coord(j,1)
+      tmp_cent_y               += tmp_alphaj * nucl_coord(j,2)
+      tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
+    enddo
+
+    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
+    List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i)
+    List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b2_size
+
+    do j = 2, nucl_num, 1
+      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+      do k = 1, j-1, 1
+        tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k)
+
+        List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      enddo
+    enddo
+
+    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b2_size
+
+    phase = 0
+    do j = 1, nucl_num
+      phase += List_all_comb_b2(j,i)
+    enddo
+
+    List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
+  enddo
+
+  !print *, ' coeff, expo & cent of list b2'
+  !do i = 1, List_all_comb_b2_size
+  !  print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i)
+  !  print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i)
+  !enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b3_size]
+
+  implicit none
+
+  List_all_comb_b3_size = 3**nucl_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)]
+
+  implicit none
+  integer              :: i, j, ii, jj
+  integer, allocatable :: M(:,:), p(:)
+
+  if(nucl_num .gt. 32) then
+    print *, ' nucl_num = ', nucl_num, '> 32'
+    stop
+  endif
+
+  List_all_comb_b3(:,:)                     = 0
+  List_all_comb_b3(:,List_all_comb_b3_size) = 2
+
+  allocate(p(nucl_num))
+  p = 0
+
+  do i = 2, List_all_comb_b3_size-1
+    do j = 1, nucl_num
+
+      ii = 0
+      do jj = 1, j-1, 1
+        ii = ii + p(jj) * 3**(jj-1)
+      enddo
+      p(j) = modulo(i-1-ii, 3**j) / 3**(j-1)
+
+      List_all_comb_b3(j,i) = p(j)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, List_all_comb_b3_coef, (   List_all_comb_b3_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b3_expo, (   List_all_comb_b3_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b3_cent, (3, List_all_comb_b3_size)]
+
+  implicit none
+  integer          :: i, j, k, phase
+  double precision :: tmp_alphaj, tmp_alphak, facto
+
+  provide j1b_pen
+
+  List_all_comb_b3_coef = 0.d0
+  List_all_comb_b3_expo = 0.d0
+  List_all_comb_b3_cent = 0.d0
+
+  do i = 1, List_all_comb_b3_size
+
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+      List_all_comb_b3_expo(i)   += tmp_alphaj
+      List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
+      List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
+      List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3)
+
+    enddo
+
+    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
+
+    List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
+    List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
+    List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b3_size
+
+    do j = 2, nucl_num, 1
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+      do k = 1, j-1, 1
+        tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k)
+
+        List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      enddo
+    enddo
+
+    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b3_size
+
+    facto = 1.d0
+    phase = 0
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) 
+
+      facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj))
+      phase += List_all_comb_b3(j,i)
+    enddo
+
+    List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
+  enddo
+
+  !print *, ' coeff, expo & cent of list b3'
+  !do i = 1, List_all_comb_b3_size
+  !  print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i)
+  !  print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i)
+  !enddo
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..bf493fbb
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,191 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b2_size = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-15)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b2_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
+ enddo
+ max_List_comb_thr_b2_size = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-12)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_thr_b2_size(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b3_size = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b3_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
+!  enddo
+! enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
+ enddo
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_thr_b3_size(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
+ 
+END_PROVIDER 
+
diff --git a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
new file mode 100644
index 00000000..641d25fe
--- /dev/null
+++ b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
@@ -0,0 +1,195 @@
+double precision function NAI_pol_mult_erf_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral R^3 :
+  !
+  ! .. math::
+  ! 
+  !   \int dr  (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$ exp(-delta (r - D)^2 ).
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3)
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ accu = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    accu += coefxyz * NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B,alpha_new,beta,C_center,n_pt_max_integrals,mu)
+   enddo
+  enddo
+ enddo
+ NAI_pol_mult_erf_gauss_r12 = fact_a_new * accu 
+end
+
+subroutine erfc_mu_gauss_xyz(D_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-delta (r - D)^2 ) x/y/z * (1 - erf(mu |r-r'|))/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  !   xyz_ints(1) = x , xyz_ints(2) = y, xyz_ints(3) = z, xyz_ints(4) = x^0 
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta,mu  ! pure gaussian "D" and mu parameter
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),n_pt_in
+ double precision, intent(out)   :: xyz_ints(4)
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,contrib,contrib_inf,mu_inf
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,mm
+ integer           :: power_B_tmp(3)
+ dim1=100
+ mu_inf = 1.d+10
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ xyz_ints = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+     power_B_tmp = power_B
+     contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+     contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+     xyz_ints(4) += (contrib_inf - contrib) * coefxyz ! usual term with no x/y/z 
+                                      
+     do mm = 1, 3 
+      ! (x phi_i ) * phi_j 
+      ! x * (x - B_x)^b_x = B_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
+      
+      !
+      ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
+      power_B_tmp = power_B
+      contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+      contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+      xyz_ints(mm) += (contrib_inf - contrib) * B_center(mm) * coefxyz 
+                                                       
+      !
+      ! second contribution :: (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 
+      power_B_tmp(mm) += 1
+      contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+      contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+      xyz_ints(mm) += (contrib_inf -  contrib) * coefxyz     
+     enddo
+   enddo
+  enddo
+ enddo
+ xyz_ints *= fact_a_new 
+end
+
+
+double precision function erf_mu_gauss(D_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-delta (r - D)^2 ) erf(mu*|r-r'|)/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta,mu  ! pure gaussian "D" and mu parameter
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),n_pt_in
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,contrib,contrib_inf,mu_inf
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,mm
+ dim1=100
+ mu_inf = 1.d+10
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ erf_mu_gauss = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B,alpha_new,beta,D_center,n_pt_in,mu)
+    erf_mu_gauss += contrib * coefxyz     
+   enddo
+  enddo
+ enddo
+ erf_mu_gauss *= fact_a_new 
+end
+
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
new file mode 100644
index 00000000..54c2d95b
--- /dev/null
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -0,0 +1,340 @@
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      coefy  = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy) .lt. thr) cycle
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        coefz   = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz) .lt. thr) cycle
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
+        accu += coefxyz * overlap
+      enddo
+    enddo
+  enddo
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
+      enddo
+    enddo
+  enddo
+  overlap_abs_gauss_r12= accu
+end
+
+!---
+
+! TODO apply Gaussian product three times first
+subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, rvec, LD_rvec, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  !   \int dr exp(-delta (r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2)
+  !   using an array of D_centers
+  !
+  ! n_points: nb of integrals
+  !
+  END_DOC
+
+  implicit none
+
+  include 'constants.include.F'
+
+  integer,          intent(in)  :: LD_D, LD_rvec, n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta 
+  double precision, intent(out) :: rvec(LD_rvec)
+
+  integer                       :: maxab
+  integer                       :: d(3), i, lx, ly, lz, iorder_tmp(3), ipoint
+  double precision              :: overlap_x, overlap_y, overlap_z
+  double precision              :: alpha_new
+  double precision              :: accu, thr, coefxy
+  integer,          allocatable :: iorder_a_new(:)
+  double precision, allocatable :: overlap(:)
+  double precision, allocatable :: A_new(:,:,:), A_center_new(:,:)
+  double precision, allocatable :: fact_a_new(:)
+
+  thr  = 1.d-10
+  d(:) = 0
+
+  maxab = maxval(power_A(1:3))
+
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
+
+  rvec(:) = 0.d0
+
+  do lx = 0, iorder_a_new(1)
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz_v(A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B, overlap, n_points)
+
+        do ipoint = 1, n_points
+          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * A_new(ipoint,ly,2) * A_new(ipoint,lz,3) * overlap(ipoint)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do ipoint = 1, n_points
+    rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
+  enddo
+
+  deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
+
+end subroutine overlap_gauss_r12_v
+
+!---
+
+subroutine overlap_gauss_xyz_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, gauss_ints)
+
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: gauss_ints(3)
+
+ double precision  :: overlap_x,overlap_y,overlap_z,overlap
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ integer           :: power_B_new(3)
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,m
+ dim1=100
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ gauss_ints = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    do m = 1, 3
+     ! change (x-Bx)^bx --> (x-Bx)^(bx+1) + Bx(x-Bx)^bx
+     power_B_new = power_B
+     power_B_new(m) += 1 ! (x-Bx)^(bx+1)
+     call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+     gauss_ints(m) += coefxyz * overlap
+
+     power_B_new = power_B
+     call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+     gauss_ints(m) += coefxyz * overlap * B_center(m) ! Bx (x-Bx)^(bx)
+    enddo
+   enddo
+  enddo
+ enddo
+ gauss_ints *= fact_a_new
+end
+
+double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !    \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),mx
+
+ double precision  :: overlap_x,overlap_y,overlap_z,overlap
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ integer           :: power_B_new(3)
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,m
+ dim1=100
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ overlap_gauss_xyz_r12_specific = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    m = mx
+    ! change (x-Bx)^bx --> (x-Bx)^(bx+1) + Bx(x-Bx)^bx
+    power_B_new = power_B
+    power_B_new(m) += 1 ! (x-Bx)^(bx+1)
+    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+    overlap_gauss_xyz_r12_specific += coefxyz * overlap
+
+    power_B_new = power_B
+    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+    overlap_gauss_xyz_r12_specific += coefxyz * overlap * B_center(m) ! Bx (x-Bx)^(bx)
+   enddo
+  enddo
+ enddo
+ overlap_gauss_xyz_r12_specific *= fact_a_new
+end
diff --git a/src/ao_many_one_e_ints/stg_gauss_int.irp.f b/src/ao_many_one_e_ints/stg_gauss_int.irp.f
new file mode 100644
index 00000000..384f744b
--- /dev/null
+++ b/src/ao_many_one_e_ints/stg_gauss_int.irp.f
@@ -0,0 +1,121 @@
+double precision function ovlp_stg_gauss_int_phi_ij(D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam (r - D)) exp(-delta * (r -D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ integer :: i
+ double precision :: integral,gama_gauss
+ double precision, allocatable :: expos_slat(:)
+ allocate(expos_slat(n_max_fit_slat))
+ double precision :: overlap_gauss_r12
+ ovlp_stg_gauss_int_phi_ij = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  gama_gauss = expos_slat(i)+delta 
+  integral = overlap_gauss_r12(D_center,gama_gauss,A_center,B_center,power_A,power_B,alpha,beta)
+  ovlp_stg_gauss_int_phi_ij += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+
+double precision function erf_mu_stg_gauss_int_phi_ij(D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam(r - D)-delta(r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ integer :: i
+ double precision :: NAI_pol_mult_erf_gauss_r12
+ double precision :: integral,gama_gauss
+ double precision, allocatable :: expos_slat(:)
+ allocate(expos_slat(n_max_fit_slat))
+ erf_mu_stg_gauss_int_phi_ij = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  gama_gauss = expos_slat(i) + delta
+  integral = NAI_pol_mult_erf_gauss_r12(D_center,gama_gauss,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  erf_mu_stg_gauss_int_phi_ij += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+double precision function overlap_stg_gauss(D_center,gam,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam (r - D)) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" 
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ 
+ integer :: i
+ double precision :: expos_slat(n_max_fit_slat),integral,delta
+ double precision :: overlap_gauss_r12
+ overlap_stg_gauss = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  delta = expos_slat(i) 
+  integral = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  overlap_stg_gauss += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+double precision function erf_mu_stg_gauss(D_center,gam,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam(r - D)) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), gam    ! pure Slater "D" 
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ 
+ integer :: i
+ double precision :: expos_slat(n_max_fit_slat),integral,delta
+ double precision :: NAI_pol_mult_erf_gauss_r12
+ erf_mu_stg_gauss = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  delta = expos_slat(i) 
+  integral = NAI_pol_mult_erf_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  erf_mu_stg_gauss += coef_fit_slat_gauss(i) * integral
+ enddo
+end
diff --git a/src/ao_many_one_e_ints/taylor_exp.irp.f b/src/ao_many_one_e_ints/taylor_exp.irp.f
new file mode 100644
index 00000000..9857875a
--- /dev/null
+++ b/src/ao_many_one_e_ints/taylor_exp.irp.f
@@ -0,0 +1,101 @@
+double precision function exp_dl(x,n)
+ implicit none
+ double precision, intent(in) :: x
+ integer         , intent(in) :: n
+ integer :: i
+ exp_dl = 1.d0
+ do i = 1, n
+  exp_dl += fact_inv(i) * x**dble(i)
+ enddo
+end
+
+subroutine exp_dl_rout(x,n, array)
+ implicit none
+ double precision, intent(in) :: x
+ integer         , intent(in) :: n
+ double precision, intent(out)::  array(0:n)
+ integer :: i
+ double precision :: accu
+ accu = 1.d0
+ array(0) = 1.d0
+ do i = 1, n
+  accu += fact_inv(i) * x**dble(i)
+  array(i) = accu
+ enddo
+end
+
+subroutine exp_dl_ovlp_stg_phi_ij(zeta,D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,n_taylor,array_ints,integral_taylor,exponent_exp)
+  BEGIN_DOC
+  ! Computes the following integrals : 
+  !
+  ! .. math::
+  ! 
+  !   array(i) = \int dr EXP{exponent_exp * [exp(-gam*i (r - D)) exp(-delta*i * (r -D)^2)] (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  !
+  ! and gives back the Taylor expansion of the exponential in integral_taylor
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: zeta             ! prefactor of the argument of the exp(-zeta*x)
+ integer, intent(in)             :: n_taylor         ! order of the Taylor expansion of the exponential
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ double precision, intent(in)    :: exponent_exp
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: array_ints(0:n_taylor),integral_taylor
+
+ integer :: i,dim1
+ double precision :: delta_exp,gam_exp,ovlp_stg_gauss_int_phi_ij
+ double precision :: overlap_x,overlap_y,overlap_z,overlap
+ dim1=100
+ call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+ array_ints(0) = overlap
+ integral_taylor = array_ints(0)
+ do i = 1, n_taylor
+  delta_exp = dble(i) * delta
+  gam_exp   = dble(i) * gam
+  array_ints(i) = ovlp_stg_gauss_int_phi_ij(D_center,gam_exp,delta_exp,A_center,B_center,power_A,power_B,alpha,beta)
+  integral_taylor += (-zeta*exponent_exp)**dble(i) * fact_inv(i) * array_ints(i)
+ enddo
+
+end
+
+subroutine exp_dl_erf_stg_phi_ij(zeta,D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu,n_taylor,array_ints,integral_taylor)
+  BEGIN_DOC
+  ! Computes the following integrals : 
+  !
+  ! .. math::
+  ! 
+  !   array(i) = \int dr exp(-gam*i (r - D)) exp(-delta*i * (r -D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  !
+  ! and gives back the Taylor expansion of the exponential in integral_taylor
+  END_DOC
+
+ implicit none
+ integer, intent(in)             :: n_taylor         ! order of the Taylor expansion of the exponential
+ double precision, intent(in)    :: zeta             ! prefactor of the argument of the exp(-zeta*x)
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: array_ints(0:n_taylor),integral_taylor
+
+ integer :: i,dim1
+ double precision :: delta_exp,gam_exp,NAI_pol_mult_erf,erf_mu_stg_gauss_int_phi_ij
+ dim1=100
+ 
+ array_ints(0) = NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_max_integrals,mu)
+ integral_taylor = array_ints(0)
+ do i = 1, n_taylor
+  delta_exp = dble(i) * delta
+  gam_exp   = dble(i) * gam
+  array_ints(i) = erf_mu_stg_gauss_int_phi_ij(D_center,gam_exp,delta_exp,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  integral_taylor += (-zeta)**dble(i) * fact_inv(i) * array_ints(i)
+ enddo
+
+end
diff --git a/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f b/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f
new file mode 100644
index 00000000..eed1c348
--- /dev/null
+++ b/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f
@@ -0,0 +1,343 @@
+ BEGIN_PROVIDER [double precision, coef_xyz_ao, (2,3,ao_num)]
+&BEGIN_PROVIDER [integer, power_xyz_ao, (2,3,ao_num)]
+ implicit none
+ BEGIN_DOC
+! coefficient for the basis function :: (x * phi_i(r), y * phi_i(r), * z_phi(r))
+!
+! x * (x - A_x)^a_x = A_x (x - A_x)^a_x + 1 * (x - A_x)^{a_x+1}
+ END_DOC
+ integer :: i,j,k,num_ao,power_ao(1:3)
+ double precision :: center_ao(1:3)
+ do i = 1, ao_num
+  power_ao(1:3)= ao_power(i,1:3) 
+  num_ao = ao_nucl(i)
+  center_ao(1:3) = nucl_coord(num_ao,1:3)
+  do j = 1, 3
+   coef_xyz_ao(1,j,i) = center_ao(j) ! A_x (x - A_x)^a_x
+   power_xyz_ao(1,j,i)= power_ao(j)
+   coef_xyz_ao(2,j,i) = 1.d0         ! 1 * (x - A_x)^a_{x+1}
+   power_xyz_ao(2,j,i)= power_ao(j) + 1
+  enddo
+ enddo
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, ao_coef_ord_grad_transp, (2,3,ao_prim_num_max,ao_num) ]
+&BEGIN_PROVIDER [ integer, power_ord_grad_transp, (2,3,ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! grad AO in terms of polynoms and coefficients 
+  ! 
+  ! WARNING !!!! SOME polynoms might be negative !!!!! 
+  !
+  ! WHEN IT IS THE CASE, coefficients are ZERO 
+  END_DOC
+  integer                        :: i,j,power_ao(3), m,kk
+  do j=1, ao_num
+    power_ao(1:3)= ao_power(j,1:3) 
+    do m = 1, 3
+     power_ord_grad_transp(1,m,j) = power_ao(m) - 1
+     power_ord_grad_transp(2,m,j) = power_ao(m) + 1
+    enddo
+    do i=1, ao_prim_num_max
+     do m = 1, 3
+       ao_coef_ord_grad_transp(1,m,i,j) = ao_coef_normalized_ordered(j,i) * dble(power_ao(m)) ! a_x * c_i 
+       ao_coef_ord_grad_transp(2,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) ! -2 * c_i * alpha_i 
+       do kk = 1, 2
+        if(power_ord_grad_transp(kk,m,j).lt.0)then
+         ao_coef_ord_grad_transp(kk,m,i,j) = 0.d0
+        endif
+       enddo
+     enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, ao_coef_ord_xyz_grad_transp, (4,3,ao_prim_num_max,ao_num) ]
+&BEGIN_PROVIDER [ integer, power_ord_xyz_grad_transp, (4,3,ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! x * d/dx of an AO in terms of polynoms and coefficients 
+  !
+  ! WARNING !!!! SOME polynoms might be negative !!!!! 
+  !
+  ! WHEN IT IS THE CASE, coefficients are ZERO 
+  END_DOC
+  integer                        :: i,j,power_ao(3), m,num_ao,kk
+  double precision :: center_ao(1:3)
+  do j=1, ao_num
+   power_ao(1:3)= ao_power(j,1:3) 
+   num_ao = ao_nucl(j)
+   center_ao(1:3) = nucl_coord(num_ao,1:3)
+   do m = 1, 3
+     power_ord_xyz_grad_transp(1,m,j)   = power_ao(m) - 1
+     power_ord_xyz_grad_transp(2,m,j)   = power_ao(m)
+     power_ord_xyz_grad_transp(3,m,j)   = power_ao(m) + 1
+     power_ord_xyz_grad_transp(4,m,j)   = power_ao(m) + 2
+     do kk = 1, 4
+      if(power_ord_xyz_grad_transp(kk,m,j).lt.0)then
+       power_ord_xyz_grad_transp(kk,m,j) = -1
+      endif
+     enddo
+   enddo
+   do i=1, ao_prim_num_max
+    do m = 1, 3
+     ao_coef_ord_xyz_grad_transp(1,m,i,j) = dble(power_ao(m)) * ao_coef_normalized_ordered(j,i) * center_ao(m)
+     ao_coef_ord_xyz_grad_transp(2,m,i,j) = dble(power_ao(m)) * ao_coef_normalized_ordered(j,i) 
+     ao_coef_ord_xyz_grad_transp(3,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) * center_ao(m)
+     ao_coef_ord_xyz_grad_transp(4,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) 
+     do kk = 1, 4
+      if(power_ord_xyz_grad_transp(kk,m,j).lt.0)then
+       ao_coef_ord_xyz_grad_transp(kk,m,i,j) = 0.d0
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+
+END_PROVIDER
+
+subroutine xyz_grad_phi_ao(r,i_ao,xyz_grad_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: xyz_grad_phi(3) ! x * d/dx phi i, y * d/dy phi_i, z * d/dz phi_
+ double precision :: center_ao(3),beta
+ double precision :: accu(3,4),dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao,j_prim
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do j_prim =1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(j_prim,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   do m = 1, 3
+    accu(m,1) += ao_coef_ord_xyz_grad_transp(1,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,2) += ao_coef_ord_xyz_grad_transp(2,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,3) += ao_coef_ord_xyz_grad_transp(3,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,4) += ao_coef_ord_xyz_grad_transp(4,m,j_prim,i_ao) * dexp(-beta*r2) 
+   enddo
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+
+ xyz_grad_phi = 0.d0
+ do m = 1, 3
+  xyz_grad_phi(m) += accu(m,2) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(2,m,i_ao))
+  xyz_grad_phi(m) += accu(m,3) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(3,m,i_ao))
+  xyz_grad_phi(m) += accu(m,4) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(4,m,i_ao))
+  if(power_ord_xyz_grad_transp(1,m,i_ao).lt.0)cycle
+  xyz_grad_phi(m) += accu(m,1) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(1,m,i_ao))
+ enddo
+end
+
+subroutine grad_phi_ao(r,i_ao,grad_xyz_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: grad_xyz_phi(3) ! x * phi i, y * phi_i, z * phi_
+ double precision :: center_ao(3),beta
+ double precision :: accu(3,2),dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao,j_prim
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do j_prim =1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(j_prim,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   do m = 1, 3
+    accu(m,1) += ao_coef_ord_grad_transp(1,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,2) += ao_coef_ord_grad_transp(2,m,j_prim,i_ao) * dexp(-beta*r2) 
+   enddo
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+ do m = 1, 3
+  grad_xyz_phi(m)  = accu(m,2) * pol_usual(m) * dr(m)**dble(power_ord_grad_transp(2,m,i_ao))
+  if(power_ao(m)==0)cycle
+  grad_xyz_phi(m) += accu(m,1) * pol_usual(m) * dr(m)**dble(power_ord_grad_transp(1,m,i_ao))
+ enddo
+end
+
+subroutine xyz_phi_ao(r,i_ao,xyz_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: xyz_phi(3) ! x * phi i, y * phi_i, z * phi_i
+ double precision :: center_ao(3),beta
+ double precision :: accu,dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do m=1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(m,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   accu += ao_coef_normalized_ordered_transp(m,i_ao) * dexp(-beta*r2)
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+ do m = 1, 3
+  xyz_phi(m) = accu * pol_usual(m) * dr(m)**(dble(power_ao(m))) * ( coef_xyz_ao(1,m,i_ao) + coef_xyz_ao(2,m,i_ao) * dr(m) )
+ enddo
+end
+
+
+subroutine test_pol_xyz
+ implicit none
+ integer :: ipoint,i,j,m,jpoint
+ double precision :: r1(3),derf_mu_x
+ double precision :: weight1,r12,xyz_phi(3),grad_phi(3),xyz_grad_phi(3)
+ double precision, allocatable :: aos_array(:),aos_grad_array(:,:)
+ double precision :: num_xyz_phi(3),num_grad_phi(3),num_xyz_grad_phi(3)
+ double precision :: accu_xyz_phi(3),accu_grad_phi(3),accu_xyz_grad_phi(3)
+ double precision :: meta_accu_xyz_phi(3),meta_accu_grad_phi(3),meta_accu_xyz_grad_phi(3)
+ allocate(aos_array(ao_num),aos_grad_array(3,ao_num))
+ meta_accu_xyz_phi     = 0.d0
+ meta_accu_grad_phi    = 0.d0
+ meta_accu_xyz_grad_phi= 0.d0
+ do i = 1, ao_num
+  accu_xyz_phi     = 0.d0
+  accu_grad_phi    = 0.d0
+  accu_xyz_grad_phi= 0.d0
+
+  do ipoint = 1, n_points_final_grid
+   r1(:) = final_grid_points(:,ipoint)
+   weight1 = final_weight_at_r_vector(ipoint)
+   call give_all_aos_and_grad_at_r(r1,aos_array,aos_grad_array)
+   do m = 1, 3
+    num_xyz_phi(m)      = r1(m) *  aos_array(i)  
+    num_grad_phi(m)     = aos_grad_array(m,i)  
+    num_xyz_grad_phi(m) = r1(m) *  aos_grad_array(m,i) 
+   enddo
+   call xyz_phi_ao(r1,i,xyz_phi)
+   call grad_phi_ao(r1,i,grad_phi)
+   call xyz_grad_phi_ao(r1,i,xyz_grad_phi)
+   do m = 1, 3
+    accu_xyz_phi(m)      += weight1 * dabs(num_xyz_phi(m)      -  xyz_phi(m)     )
+    accu_grad_phi(m)     += weight1 * dabs(num_grad_phi(m)     -  grad_phi(m)    )
+    accu_xyz_grad_phi(m) += weight1 * dabs(num_xyz_grad_phi(m) -  xyz_grad_phi(m))
+   enddo
+  enddo
+  print*,''
+  print*,''
+  print*,'i,',i
+  print*,''
+  do m = 1, 3
+!    print*, 'm, accu_xyz_phi(m)  ' ,m, accu_xyz_phi(m)  
+!    print*, 'm, accu_grad_phi(m) ' ,m, accu_grad_phi(m)         
+    print*, 'm, accu_xyz_grad_phi' ,m, accu_xyz_grad_phi(m)
+  enddo
+  do m = 1, 3
+   meta_accu_xyz_phi(m) += dabs(accu_xyz_phi(m))
+   meta_accu_grad_phi(m) += dabs(accu_grad_phi(m))
+   meta_accu_xyz_grad_phi(m) += dabs(accu_xyz_grad_phi(m))
+  enddo
+ enddo
+  do m = 1, 3
+!    print*, 'm, meta_accu_xyz_phi(m)  ' ,m, meta_accu_xyz_phi(m)  
+!    print*, 'm, meta_accu_grad_phi(m) ' ,m, meta_accu_grad_phi(m)         
+    print*, 'm, meta_accu_xyz_grad_phi' ,m, meta_accu_xyz_grad_phi(m)
+  enddo
+
+
+
+end
+
+subroutine test_ints_semi_bis
+ implicit none
+ integer :: ipoint,i,j,m
+ double precision :: r1(3), aos_grad_array_r1(3, ao_num), aos_array_r1(ao_num)
+ double precision :: C_center(3), weight1,mu_in,r12,derf_mu_x,dxyz_ints(3),NAI_pol_mult_erf_ao
+ double precision :: ao_mat(ao_num,ao_num),ao_xmat(3,ao_num,ao_num),accu1, accu2(3)
+ mu_in = 0.5d0
+ C_center = 0.d0
+ C_center(1) = 0.25d0
+ C_center(3) = 1.12d0
+ C_center(2) = -1.d0
+ ao_mat = 0.d0
+ ao_xmat = 0.d0
+ do ipoint = 1, n_points_final_grid
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+  call give_all_aos_and_grad_at_r(r1,aos_array_r1,aos_grad_array_r1)
+  weight1 = final_weight_at_r_vector(ipoint)
+  r12 = (r1(1) - C_center(1))**2.d0 + (r1(2) - C_center(2))**2.d0 + (r1(3) - C_center(3))**2.d0 
+  r12 = dsqrt(r12)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    ao_mat(j,i)  += aos_array_r1(i) * aos_array_r1(j) * weight1 * derf_mu_x(mu_in,r12)
+    do m = 1, 3
+     ao_xmat(m,j,i) += r1(m) * aos_array_r1(j) * aos_grad_array_r1(m,i) * weight1 * derf_mu_x(mu_in,r12)
+    enddo
+   enddo
+  enddo
+ enddo
+
+ accu1 = 0.d0
+ accu2 = 0.d0
+ accu1relat = 0.d0
+ accu2relat = 0.d0
+ double precision :: accu1relat, accu2relat(3)
+ double precision :: contrib(3)
+ do i = 1, ao_num
+  do j = 1, ao_num
+   call phi_j_erf_mu_r_xyz_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+   print*,''
+   print*,'i,j',i,j
+   print*,dxyz_ints(:)
+   print*,ao_xmat(:,j,i)
+   do m = 1, 3
+    contrib(m) = dabs(ao_xmat(m,j,i) - dxyz_ints(m))
+    accu2(m) += contrib(m)
+    if(dabs(ao_xmat(m,j,i)).gt.1.d-10)then
+     accu2relat(m) += dabs(ao_xmat(m,j,i) - dxyz_ints(m))/dabs(ao_xmat(m,j,i))
+    endif
+   enddo
+    print*,contrib
+  enddo
+   print*,''
+ enddo
+ print*,'accu2relat = '
+ print*, accu2relat /dble(ao_num * ao_num)
+
+end
+
+
diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index 42505194..c4a573be 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -46,142 +46,327 @@ double precision function NAI_pol_mult_erf_ao(i_ao,j_ao,mu_in,C_center)
 
 end
 
+double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
 
-
-double precision function NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in)
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
-  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(3), A_center(3), B_center(3), alpha, beta, mu_in
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: P_center(3)
+  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision             :: const_factor, dist_integral
+  double precision             :: accu, p_inv, p, rho, p_inv_2
+  double precision             :: p_new
+
+  double precision             :: rint
+
+  p       = alpha + beta
+  p_inv   = 1.d0 / p
+  p_inv_2 = 0.5d0 * p_inv
+  rho     = alpha * beta * p_inv
+
+  dist          = 0.d0
+  dist_integral = 0.d0
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+    dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i))
+  enddo
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    NAI_pol_mult_erf = 0.d0
+    return
+  endif
+
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt  =  2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) )
+  const = p * dist_integral * p_new * p_new
+  if(n_pt == 0) then
+    NAI_pol_mult_erf = coeff * rint(0, const)
+    return
+  endif
+
+  do i = 0, n_pt_in
+    d(i) = 0.d0
+  enddo
+  ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
+  p_new = p_new * p_new
+  call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_erf = 0.d0
+    return
+  endif
+
+  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+  accu = 0.d0
+  do i = 0, n_pt_out, 2
+    accu += d(i) * rint(i/2, const)
+  enddo
+  NAI_pol_mult_erf = accu * coeff
+
+end function NAI_pol_mult_erf
+
+! ---
+
+
+double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
   !
   END_DOC
 
   implicit none
-  integer, intent(in)            :: n_pt_in
-  double precision,intent(in)    :: C_center(3),A_center(3),B_center(3),alpha,beta,mu_in
-  integer, intent(in)            :: power_A(3),power_B(3)
-  integer                        :: i,j,k,l,n_pt
-  double precision               :: P_center(3)
+  integer,          intent(in)   :: i_ao, j_ao
+  double precision, intent(in)   :: beta, B_center(3)
+  double precision, intent(in)   :: mu_in, C_center(3)
+
+  integer                        :: i, j, power_A1(3), power_A2(3), n_pt_in
+  double precision               :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral
+
+  double precision, external     :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  power_A1(1:3) = ao_power(i_ao,1:3)
+  power_A2(1:3) = ao_power(j_ao,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  NAI_pol_mult_erf_ao_with1s = 0.d0
+  do i = 1, ao_prim_num(i_ao)
+    alpha1 = ao_expo_ordered_transp           (i,i_ao)
+    coef1  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alpha2 = ao_expo_ordered_transp(j,j_ao)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao)
+      if(dabs(coef12) .lt. 1d-14) cycle
+
+      integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+
+      NAI_pol_mult_erf_ao_with1s += integral * coef12
+    enddo
+  enddo
+
+end function NAI_pol_mult_erf_ao_with1s
+
+subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
 
-  double precision               :: d(0:n_pt_in),pouet,coeff,dist,const,pouet_2,factor
-  double precision               :: I_n_special_exact,integrate_bourrin,I_n_bibi
-  double precision               :: V_e_n,const_factor,dist_integral,tmp
-  double precision               :: accu,rint,p_inv,p,rho,p_inv_2
-  integer                        :: n_pt_out,lmax
   include 'utils/constants.include.F'
-  p = alpha + beta
-  p_inv = 1.d0/p
-  p_inv_2 = 0.5d0 * p_inv
-  rho = alpha * beta * p_inv
 
-  dist = 0.d0
-  dist_integral = 0.d0
-  do i = 1, 3
-    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
-    dist += (A_center(i) - B_center(i))*(A_center(i) - B_center(i))
-    dist_integral += (P_center(i) - C_center(i))*(P_center(i) - C_center(i))
-  enddo
-  const_factor = dist*rho
-  if(const_factor > 80.d0)then
-    NAI_pol_mult_erf = 0.d0
-    return
-  endif
-  double precision               :: p_new
-  p_new = mu_in/dsqrt(p+ mu_in * mu_in)
-  factor = dexp(-const_factor)
-  coeff = dtwo_pi * factor * p_inv * p_new
-  lmax = 20
+  implicit none
+  integer,          intent(in)  :: n_pt_in, LD_B, LD_C, LD_resv, n_points
+  integer,          intent(in)  :: power_A1(3), power_A2(3)
+  double precision, intent(in)  :: A1_center(3), A2_center(3)
+  double precision, intent(in)  :: C_center(LD_C,3), B_center(LD_B,3)
+  double precision, intent(in)  :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out) :: res_v(LD_resv)
 
-  !  print*, "b"
-  do i = 0, n_pt_in
-    d(i) = 0.d0
-  enddo
-  n_pt =  2 * ( (power_A(1) + power_B(1)) +(power_A(2) + power_B(2)) +(power_A(3) + power_B(3)) )
-  const = p * dist_integral * p_new * p_new
-  if (n_pt == 0) then
-    pouet = rint(0,const)
-    NAI_pol_mult_erf = coeff * pouet
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision              :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision              :: dist_integral
+  double precision              :: d(0:n_pt_in), coeff, const, factor
+  double precision              :: accu
+  double precision              :: p_new, p_new2, coef_tmp, cons_tmp
+
+  double precision              :: rint
+
+
+  res_V(1:LD_resv) = 0.d0
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+  if(const_factor12 > 80.d0) then
     return
   endif
 
-  ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
-  p_new = p_new * p_new
-  call give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in,p,p_inv,p_inv_2,p_new,P_center)
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p        = alpha12 + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha12 * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = dtwo_pi * p_inv * p_new
+  cons_tmp = p * p_new2
+  n_pt     =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) + power_A1(3) + power_A2(3) )
 
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
+
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
+
+      res_v(ipoint) = coeff * rint(0, const)
+    enddo
+
+  else
+
+    do ipoint = 1, n_points
+  
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) 
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
+  
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
+  
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      !TODO: VECTORIZE HERE
+      call give_polynomial_mult_center_one_e_erf_opt(A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)
+  
+      if(n_pt_out < 0) then
+        cycle
+      endif
+  
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+  
+      res_v(ipoint) = accu * coeff
+    enddo
 
-  if(n_pt_out<0)then
-    NAI_pol_mult_erf = 0.d0
-    return
   endif
-  accu = 0.d0
 
-  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-  do i =0 ,n_pt_out,2
-    accu +=  d(i) * rint(i/2,const)
-  enddo
-  NAI_pol_mult_erf = accu * coeff
+end subroutine NAI_pol_mult_erf_with1s_v
 
-end
+! ---
 
+subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
-subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,beta,&
-      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in,p,p_inv,p_inv_2,p_new,P_center)
   BEGIN_DOC
   ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
   END_DOC
+
   implicit none
-  integer, intent(in)            :: n_pt_in
-  integer,intent(out)            :: n_pt_out
-  double precision, intent(in)   :: A_center(3), B_center(3),C_center(3),p,p_inv,p_inv_2,p_new,P_center(3)
-  double precision, intent(in)   :: alpha,beta,mu_in
-  integer, intent(in)            :: power_A(3), power_B(3)
-  integer                        :: a_x,b_x,a_y,b_y,a_z,b_z
-  double precision               :: d(0:n_pt_in)
-  double precision               :: d1(0:n_pt_in)
-  double precision               :: d2(0:n_pt_in)
-  double precision               :: d3(0:n_pt_in)
-  double precision               :: accu
+  integer,          intent(in)  :: n_pt_in
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), C_center(3), p_inv_2, p_new, P_center(3)
+  integer,          intent(out) :: n_pt_out
+  double precision, intent(out) :: d(0:n_pt_in)
+
+  integer                       :: a_x, b_x, a_y, b_y, a_z, b_z
+  integer                       :: n_pt1, n_pt2, n_pt3, dim, i
+  integer                       :: n_pt_tmp
+  double precision              :: d1(0:n_pt_in)
+  double precision              :: d2(0:n_pt_in)
+  double precision              :: d3(0:n_pt_in)
+  double precision              :: accu
+  double precision              :: R1x(0:2), B01(0:2), R1xp(0:2), R2x(0:2)
+
   accu = 0.d0
   ASSERT (n_pt_in > 1)
 
-  double precision               :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2)
-  R1x(0)  = (P_center(1) - A_center(1))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(1) - C_center(1))* p_new
+  R1x(0) = (P_center(1) - A_center(1))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(1) - C_center(1))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(1) - B_center(1))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(1) - C_center(1))* p_new
+  R1xp(0) = (P_center(1) - B_center(1))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(1) - C_center(1))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R2x(0)  =  p_inv_2
-  R2x(1)  = 0.d0
-  R2x(2)  = -p_inv_2* p_new
+  R2x(0) =  p_inv_2
+  R2x(1) = 0.d0
+  R2x(2) = -p_inv_2 * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
-  do i = 0,n_pt_in
-    d(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
+
+  do i = 0, n_pt_in
+    d (i) = 0.d0
     d1(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
     d2(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
     d3(i) = 0.d0
   enddo
-  integer                        :: n_pt1,n_pt2,n_pt3,dim,i
+
   n_pt1 = n_pt_in
   n_pt2 = n_pt_in
   n_pt3 = n_pt_in
   a_x = power_A(1)
   b_x = power_B(1)
-  call I_x1_pol_mult_one_e(a_x,b_x,R1x,R1xp,R2x,d1,n_pt1,n_pt_in)
+  call I_x1_pol_mult_one_e(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in)
   if(n_pt1<0)then
     n_pt_out = -1
     do i = 0,n_pt_in
@@ -190,17 +375,17 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,bet
     return
   endif
 
-  R1x(0)  = (P_center(2) - A_center(2))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(2) - C_center(2))* p_new
+  R1x(0) = (P_center(2) - A_center(2))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(2) - C_center(2))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(2) - B_center(2))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(2) - C_center(2))* p_new
+  R1xp(0) = (P_center(2) - B_center(2))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(2) - C_center(2))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
   a_y = power_A(2)
   b_y = power_B(2)
-  call I_x1_pol_mult_one_e(a_y,b_y,R1x,R1xp,R2x,d2,n_pt2,n_pt_in)
+  call I_x1_pol_mult_one_e(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in)
   if(n_pt2<0)then
     n_pt_out = -1
     do i = 0,n_pt_in
@@ -209,51 +394,151 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,bet
     return
   endif
 
-
-  R1x(0)  = (P_center(3) - A_center(3))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(3) - C_center(3))* p_new
+  R1x(0) = (P_center(3) - A_center(3))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(3) - C_center(3)) * p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(3) - B_center(3))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(3) - C_center(3))* p_new
+  R1xp(0) = (P_center(3) - B_center(3))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(3) - C_center(3)) * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
   a_z = power_A(3)
   b_z = power_B(3)
 
-  call I_x1_pol_mult_one_e(a_z,b_z,R1x,R1xp,R2x,d3,n_pt3,n_pt_in)
-  if(n_pt3<0)then
+  call I_x1_pol_mult_one_e(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in)
+  if(n_pt3 < 0) then
     n_pt_out = -1
     do i = 0,n_pt_in
       d(i) = 0.d0
     enddo
     return
   endif
-  integer                        :: n_pt_tmp
+
   n_pt_tmp = 0
-  call multiply_poly(d1,n_pt1,d2,n_pt2,d,n_pt_tmp)
-  do i = 0,n_pt_tmp
+  call multiply_poly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp)
+  do i = 0, n_pt_tmp
     d1(i) = 0.d0
   enddo
   n_pt_out = 0
-  call multiply_poly(d ,n_pt_tmp ,d3,n_pt3,d1,n_pt_out)
+  call multiply_poly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out)
   do i = 0, n_pt_out
     d(i) = d1(i)
   enddo
 
-end
+end subroutine give_polynomial_mult_center_one_e_erf_opt
 
+! ---
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
-
-
-subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
-      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   BEGIN_DOC
-  ! Returns the explicit polynomial in terms of the $t$ variable of the 
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: n_pt_in, n_points, LD_C, LD_resv
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(in)  :: C_center(LD_C,3)
+  double precision, intent(out) :: res_v(LD_resv)
+
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: P_center(3)
+  double precision              :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision              :: const_factor, dist_integral
+  double precision              :: accu, p_inv, p, rho, p_inv_2
+  double precision              :: p_new, p_new2, coef_tmp
+
+  double precision              :: rint
+
+  res_V(1:LD_resv) = 0.d0
+
+  p        = alpha + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = p * p_new2
+
+  dist = 0.d0
+  do i = 1, 3
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist       += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    return
+  endif
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral
+
+      res_v(ipoint) = coeff * rint(0, const)
+    enddo
+
+  else
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral
+
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
+
+      if(n_pt_out < 0) then
+        res_v(ipoint) = 0.d0
+        cycle
+      endif
+
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+
+      res_v(ipoint) = accu * coeff
+    enddo
+
+  endif
+
+end subroutine NAI_pol_mult_erf_v
+
+
+subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
+
+  BEGIN_DOC
+  ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
   END_DOC
+
   implicit none
   integer, intent(in)            :: n_pt_in
   integer,intent(out)            :: n_pt_out
@@ -374,3 +659,113 @@ subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
 
 end
 
+double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                                 , beta, B_center, C_center, n_pt_in, mu_in )
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in
+  integer,          intent(in) :: power_A1(3), power_A2(3)
+  double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3)
+  double precision, intent(in) :: alpha1, alpha2, beta, mu_in
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision             :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision             :: dist_integral
+  double precision             :: d(0:n_pt_in), coeff, const, factor
+  double precision             :: accu
+  double precision             :: p_new
+
+  double precision             :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) &
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) &
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+  if(const_factor12 > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  ! ---
+
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv
+  P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv
+  P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv
+  dist        = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) &
+              + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) &
+              + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3))
+
+  const_factor = const_factor12 + dist * rho
+  if(const_factor > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) &
+                + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) &
+                + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3))
+
+  ! ---
+
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt  =  2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) )
+  const = p * dist_integral * p_new * p_new
+  if(n_pt == 0) then
+    NAI_pol_mult_erf_with1s = coeff * rint(0, const)
+    return
+  endif
+
+  do i = 0, n_pt_in
+    d(i) = 0.d0
+  enddo
+  p_new = p_new * p_new
+  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+  accu = 0.d0
+  do i = 0, n_pt_out, 2
+    accu += d(i) * rint(i/2, const)
+  enddo
+  NAI_pol_mult_erf_with1s = accu * coeff
+
+end function NAI_pol_mult_erf_with1s
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 4108ce71..1d92dc7d 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -80,6 +80,10 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
     IF (DO_PSEUDO) THEN
        ao_integrals_n_e += ao_pseudo_integrals
     ENDIF
+    IF(point_charges) THEN
+       ao_integrals_n_e += ao_integrals_pt_chrg
+    ENDIF
+
 
   endif
 
diff --git a/src/ao_one_e_ints/pot_pt_charges.irp.f b/src/ao_one_e_ints/pot_pt_charges.irp.f
new file mode 100644
index 00000000..93f1acff
--- /dev/null
+++ b/src/ao_one_e_ints/pot_pt_charges.irp.f
@@ -0,0 +1,108 @@
+
+BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !  Point charge-electron interaction, in the |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_charge charge * \frac{1}{|r-R_charge|} | \chi_j \rangle`
+  !
+  ! Notice the minus sign convention as it is supposed to be for electrons. 
+  END_DOC
+
+  implicit none
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: alpha, beta
+  double precision :: A_center(3),B_center(3),C_center(3)
+  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+
+  ao_integrals_pt_chrg = 0.d0
+
+!  if (read_ao_integrals_pt_chrg) then
+!
+!    call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals read from disk'
+!
+!  else
+
+!    if(use_cosgtos) then
+!      !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
+!
+!      do j = 1, ao_num
+!        do i = 1, ao_num
+!          ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
+!        enddo
+!      enddo
+!
+!    else
+
+      !$OMP PARALLEL                                                   &
+          !$OMP DEFAULT (NONE)                                         &
+          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
+          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
+          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
+          !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
+
+      n_pt_in = n_pt_max_integrals
+
+      !$OMP DO SCHEDULE (dynamic)
+
+      do j = 1, ao_num
+        num_A = ao_nucl(j)
+        power_A(1:3)= ao_power(j,1:3)
+        A_center(1:3) = nucl_coord(num_A,1:3)
+
+        do i = 1, ao_num
+
+          num_B = ao_nucl(i)
+          power_B(1:3)= ao_power(i,1:3)
+          B_center(1:3) = nucl_coord(num_B,1:3)
+
+          do l=1,ao_prim_num(j)
+            alpha = ao_expo_ordered_transp(l,j)
+
+            do m=1,ao_prim_num(i)
+              beta = ao_expo_ordered_transp(m,i)
+
+              double precision               :: c, c1
+              c = 0.d0
+
+              do  k = 1, n_pts_charge
+                double precision               :: Z
+                Z = pts_charge_z(k)
+
+                C_center(1:3) = pts_charge_coord(k,1:3)
+
+                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
+                                 , alpha, beta, C_center, n_pt_in )
+
+                c = c - Z * c1
+
+              enddo
+              ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
+                  + ao_coef_normalized_ordered_transp(l,j)             &
+                  * ao_coef_normalized_ordered_transp(m,i) * c
+            enddo
+          enddo
+        enddo
+      enddo
+
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+!    endif
+
+
+!    IF(do_pseudo) THEN
+!       ao_integrals_pt_chrg += ao_pseudo_integrals
+!    ENDIF
+
+!  endif
+
+
+!  if (write_ao_integrals_pt_chrg) then
+!    call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals written to disk'
+!  endif
+
+END_PROVIDER
diff --git a/src/ao_tc_eff_map/NEED b/src/ao_tc_eff_map/NEED
new file mode 100644
index 00000000..d9edb325
--- /dev/null
+++ b/src/ao_tc_eff_map/NEED
@@ -0,0 +1,5 @@
+ao_two_e_erf_ints
+mo_one_e_ints
+ao_many_one_e_ints
+dft_utils_in_r
+tc_keywords
diff --git a/src/ao_tc_eff_map/README.rst b/src/ao_tc_eff_map/README.rst
new file mode 100644
index 00000000..d45df18f
--- /dev/null
+++ b/src/ao_tc_eff_map/README.rst
@@ -0,0 +1,12 @@
+ao_tc_eff_map
+=============
+
+This is a module to obtain the integrals on the AO basis of the SCALAR HERMITIAN 
+effective potential defined in Eq. 32 of JCP 154, 084119 (2021)
+It also contains the modification by a one-body Jastrow factor.  
+
+The main routine/providers are
+
++) ao_tc_sym_two_e_pot_map : map of the SCALAR PART of total effective two-electron on the AO basis in PHYSICIST notations. It might contain the two-electron term coming from the one-e correlation factor. 
++) get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) : routine to get the integrals from ao_tc_sym_two_e_pot_map. 
++) ao_tc_sym_two_e_pot(i,j,k,l) : FUNCTION that returns the scalar part of TC-potential EXCLUDING the erf(mu r12)/r12. See two_e_ints_gauss.irp.f for more details. 
diff --git a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
new file mode 100644
index 00000000..7a567979
--- /dev/null
+++ b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
@@ -0,0 +1,76 @@
+
+
+subroutine compute_ao_tc_sym_two_e_pot_jl(j, l, n_integrals, buffer_i, buffer_value)
+
+  use map_module
+
+  BEGIN_DOC
+  !  Parallel client for AO integrals
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: j, l
+  integer,intent(out)             :: n_integrals
+  integer(key_kind),intent(out)   :: buffer_i(ao_num*ao_num)
+  real(integral_kind),intent(out) :: buffer_value(ao_num*ao_num)
+
+  integer                         :: i, k
+  integer                         :: kk, m, j1, i1
+  double precision                :: cpu_1, cpu_2, wall_1, wall_2
+  double precision                :: integral, wall_0, integral_pot, integral_erf
+  double precision                :: thr
+
+  logical, external               :: ao_two_e_integral_zero
+  double precision                :: ao_tc_sym_two_e_pot, ao_two_e_integral_erf
+  double precision                :: j1b_gauss_2e_j1, j1b_gauss_2e_j2
+
+
+  PROVIDE j1b_type
+
+  thr = ao_integrals_threshold
+
+  n_integrals = 0
+
+  j1 = j+ishft(l*l-l,-1)
+  do k = 1, ao_num           ! r1
+    i1 = ishft(k*k-k,-1)
+    if (i1 > j1) then
+      exit
+    endif
+    do i = 1, k
+      i1 += 1
+      if (i1 > j1) then
+        exit
+      endif
+
+      if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thr) then
+        cycle
+      endif
+
+      !DIR$ FORCEINLINE
+      integral_pot = ao_tc_sym_two_e_pot  (i, k, j, l)  ! i,k : r1    j,l : r2
+      integral_erf = ao_two_e_integral_erf(i, k, j, l)
+      integral     = integral_erf + integral_pot
+
+      if( j1b_type .eq. 1 ) then
+        !print *, ' j1b type 1 is added'
+        integral = integral + j1b_gauss_2e_j1(i, k, j, l)
+      elseif( j1b_type .eq. 2 ) then
+        !print *, ' j1b type 2 is added'
+        integral = integral + j1b_gauss_2e_j2(i, k, j, l)
+      endif
+
+      if(abs(integral) < thr) then
+        cycle
+      endif
+
+      n_integrals += 1
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i, j, k, l, buffer_i(n_integrals))
+      buffer_value(n_integrals) = integral
+    enddo
+  enddo
+
+end subroutine compute_ao_tc_sym_two_e_pot_jl
+
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
new file mode 100644
index 00000000..4730d003
--- /dev/null
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -0,0 +1,510 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
+! ---
+
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
+ implicit none
+ BEGIN_DOC
+ ! F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) is fitted with a gaussian and a Slater
+ !
+ !      \approx - 1/sqrt(pi) * exp(-alpha * x ) exp(-beta * x**2)
+ !
+ ! where alpha = expo_j_xmu(1) and beta = expo_j_xmu(2)
+ END_DOC
+ expo_j_xmu(1) = 1.7477d0
+ expo_j_xmu(2) = 0.668662d0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12) = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2) is expressed as 
+  !
+  ! J(mu,r12) = 0.5/mu * F(r12*mu) where F(x) =  x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+  !
+  ! F(x) is fitted by - 1/sqrt(pi) * exp(-alpha * x) exp(-beta * x^2) (see expo_j_xmu) 
+  ! 
+  ! The slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_x = (/ -0.47947881d0 /)
+    expo_gauss_j_mu_x = (/ 3.4987848d0   /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x = (/ -0.18390742d0, -0.35512656d0 /)
+    expo_gauss_j_mu_x = (/ 31.9279947d0 ,  2.11428789d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x = (/ -0.07501725d0, -0.28499012d0, -0.1953932d0  /)
+    expo_gauss_j_mu_x = (/ 206.74058566d0, 1.72974157d0, 11.18735164d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x = (/ -0.01832955d0 , -0.10188952d0 , -0.20710858d0 , -0.18975032d0 , -0.04641657d0  /)
+    expo_gauss_j_mu_x = (/ 4.33116687d+03, 2.61292842d+01, 1.43447161d+00, 4.92767426d+00, 2.10654699d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x = (/ -0.08783664d0 , -0.16088711d0 , -0.18464486d0 , -0.0368509d0  , -0.08130028d0 , -0.0126972d0   /)
+    expo_gauss_j_mu_x = (/ 4.09729729d+01, 7.11620618d+00, 2.03692338d+00, 4.10831731d+02, 1.12480198d+00, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0  , -0.06548959d0  , -0.03539446d0  , -0.17150646d0  , -0.15071096d0  , -0.11326834d0   /)
+    expo_gauss_j_mu_x = (/ 9.88572565d+02,  1.21363371d+04,  3.69794870d+01,  1.67364529d+02,  3.03962934d+00,  1.27854005d+00,  9.76383343d+00 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0  , -0.02394051d0 , -0.00913353d0 , -0.01285182d0  /)
+    expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x = (/ /)
+  !  expo_gauss_j_mu_x = (/ /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = expo_j_xmu(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = expo_j_xmu(2) * mu_erf * mu_erf
+
+    tmp = -1.0d0 / sqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = expos(i) + beta
+      coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.5d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x(i) = tmp * coef_gauss_j_mu_x(i) 
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12)^2 = 0.25/mu^2 F(r12*mu)^2
+  !
+  ! F(x)^2 = 1/pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) 
+  ! 
+  ! The slater function exp(-2 * alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+  double precision :: alpha_opt, beta_opt
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.26699573d0  /)
+    expo_gauss_j_mu_x_2 = (/ 11.71029824d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.11627934d0  , 0.18708824d0 /)
+    expo_gauss_j_mu_x_2 = (/ 102.41386863d0, 6.36239771d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.04947216d0  , 0.14116238d0, 0.12276501d0  /)
+    expo_gauss_j_mu_x_2 = (/ 635.29701766d0, 4.87696954d0, 33.36745891d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01461527d0  , 0.03257147d0  , 0.08831354d0  , 0.11411794d0  , 0.06858783d0   /)
+    expo_gauss_j_mu_x_2 = (/ 8.76554470d+03, 4.90224577d+02, 3.68267125d+00, 1.29663940d+01, 6.58240931d+01 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01347632d0  , 0.03929124d0  , 0.06289468d0  , 0.10702493d0  , 0.06999865d0  , 0.02558191d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.00000000d+04, 1.20900717d+02, 3.20346191d+00, 8.92157196d+00, 3.28119120d+01, 6.49045808d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.05202849d0  , 0.01031081d0  , 0.04699157d0  , 0.01451002d0  , 0.07442576d0  , 0.02692033d0  , 0.09311842d0   /)
+    expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.00942115d0  , 0.07332421d0  , 0.0508308d0   , 0.08204949d0  , 0.0404099d0   , 0.03201288d0  , 0.01911313d0  , 0.01114732d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x_2 = (/  /)
+  !  expo_gauss_j_mu_x_2 = (/  /)
+  !  
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = 2.d0 * expo_j_xmu(1)
+    !beta_opt  = 2.d0 * expo_j_xmu(2)
+   
+    ! direct opt
+    alpha_opt = 3.52751759d0
+    beta_opt  = 1.26214809d0
+  
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = 1.d0 / dacos(-1.d0)
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = expos(i) + beta
+      coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.25d0 / (mu_erf * mu_erf)
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x_2(i) = tmp * coef_gauss_j_mu_x_2(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12) x \frac{1 - erf(mu * r12)}{2} = 
+  !
+  ! - \frac{1}{4 \sqrt{\pi} \mu} \exp(-(alpha1 + alpha2) * mu * r12 - (beta1 + beta2) * mu^2 * r12^2)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+  double precision :: alpha_opt, beta_opt
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.47742461d0 /)
+    expo_gauss_j_mu_1_erf = (/ 8.72255696d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.19342649d0, -0.34563835d0 /)
+    expo_gauss_j_mu_1_erf = (/ 78.66099999d0,  5.04324363d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.0802541d0  , -0.27019258d0, -0.20546681d0 /)
+    expo_gauss_j_mu_1_erf = (/ 504.53350764d0,  4.01408169d0, 26.5758329d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.02330531d0 , -0.11888176d0 , -0.16476192d0 , -0.19874713d0 , -0.05889174d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 4.66067922d+01, 3.04359857d+00, 9.54726649d+00, 3.59796835d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.01865654d0 , -0.18319251d0 , -0.06543196d0 , -0.11522778d0 , -0.14825793d0 , -0.03327101d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 8.05593848d+00, 1.27986190d+02, 2.92674319d+01, 2.93583623d+00, 7.65609148d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0  , -0.07419098d0  , -0.022202d0    , -0.12242283d0  , -0.04177571d0  , -0.16983107d0  /)
+    expo_gauss_j_mu_1_erf = (/ 2.74057056d+00,  1.37626591d+04,  6.65578663d+01,  1.34693031d+03,  1.90547699d+01,  2.69445390d+02,  6.31845879d+00/)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0  , -0.02923298d0 , -0.01381381d0 , -0.01675923d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_1_erf = (/  /)
+  !  expo_gauss_j_mu_1_erf = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1)
+    !beta_opt  = expo_j_xmu(2) + expo_gauss_1_erf_x(2)
+   
+    ! direct opt
+    alpha_opt = 2.87875632d0
+    beta_opt  = 1.34801003d0
+    
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = -1.d0 / dsqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = expos(i) + beta
+      coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.25d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_1_erf(i) = tmp * coef_gauss_j_mu_1_erf(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+double precision  function F_x_j(x)
+ implicit none
+ BEGIN_DOC 
+ ! F_x_j(x) = dimension-less correlation factor = x (1 - erf(x)) - 1/sqrt(pi) exp(-x^2)
+ END_DOC
+ double precision, intent(in) :: x
+ F_x_j = x * (1.d0 - derf(x)) - 1/dsqrt(dacos(-1.d0)) * dexp(-x**2)
+
+end
+
+double precision function j_mu_F_x_j(x)
+ implicit none
+ BEGIN_DOC 
+ ! j_mu_F_x_j(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ !
+ !         = 1/(2*mu) * F_x_j(mu*x)
+ END_DOC
+ double precision :: F_x_j
+ double precision, intent(in) :: x
+ j_mu_F_x_j = 0.5d0/mu_erf * F_x_j(x*mu_erf)
+end
+
+double precision function j_mu(x)
+ implicit none
+ double precision, intent(in) :: x
+ BEGIN_DOC 
+ ! j_mu(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ END_DOC
+ j_mu = 0.5d0* x * (1.d0 - derf(mu_erf*x)) - 0.5d0/( dsqrt(dacos(-1.d0))*mu_erf) * dexp(-(mu_erf*x)*(mu_erf*x))
+ 
+end
+
+double precision function j_mu_fit_gauss(x)
+ implicit none
+ BEGIN_DOC 
+ ! j_mu_fit_gauss(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ !
+ ! but fitted with gaussians 
+ END_DOC
+ double precision, intent(in) :: x
+ integer :: i
+ double precision :: alpha,coef
+ j_mu_fit_gauss = 0.d0
+ do i = 1, n_max_fit_slat
+  alpha = expo_gauss_j_mu_x(i) 
+  coef  = coef_gauss_j_mu_x(i) 
+  j_mu_fit_gauss +=  coef * dexp(-alpha*x*x)
+ enddo
+ 
+end
+
+! ---
+
diff --git a/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f b/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f
new file mode 100644
index 00000000..28401cc4
--- /dev/null
+++ b/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f
@@ -0,0 +1,194 @@
+subroutine ao_tc_sym_two_e_pot_in_map_slave_tcp(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Computes a buffer of integrals. i is the ID of the current thread.
+  END_DOC
+  call ao_tc_sym_two_e_pot_in_map_slave(0,i)
+end
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Computes a buffer of integrals. i is the ID of the current thread.
+  END_DOC
+  call ao_tc_sym_two_e_pot_in_map_slave(1,i)
+end
+
+
+
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_slave(thread,iproc)
+  use map_module
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Computes a buffer of integrals
+  END_DOC
+
+  integer, intent(in)            :: thread, iproc
+
+  integer                        :: j,l,n_integrals
+  integer                        :: rc
+  real(integral_kind), allocatable :: buffer_value(:)
+  integer(key_kind), allocatable :: buffer_i(:)
+
+  integer                        :: worker_id, task_id
+  character*(512)                :: task
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  character*(64)                 :: state
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) )
+
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then
+      exit
+    endif
+    if (task_id == 0) exit
+    read(task,*) j, l
+    integer, external :: task_done_to_taskserver
+    call compute_ao_tc_sym_two_e_pot_jl(j,l,n_integrals,buffer_i,buffer_value)
+    if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
+        stop 'Unable to send task_done'
+    endif
+    call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
+  enddo
+
+  integer, external :: disconnect_from_taskserver
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
+    continue
+  endif
+  deallocate( buffer_i, buffer_value )
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+
+end
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_collector(zmq_socket_pull)
+  use map_module
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Collects results from the AO integral calculation
+  END_DOC
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  integer                        :: j,l,n_integrals
+  integer                        :: rc
+
+  real(integral_kind), allocatable :: buffer_value(:)
+  integer(key_kind), allocatable :: buffer_i(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer*8                      :: control, accu, sze
+  integer                        :: task_id, more
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  sze = ao_num*ao_num
+  allocate ( buffer_i(sze), buffer_value(sze) )
+
+  accu = 0_8
+  more = 1
+  do while (more == 1)
+
+    rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)
+    if (rc == -1) then
+      n_integrals = 0
+      return
+    endif
+    if (rc /= 4) then
+      print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)'
+      stop 'error'
+    endif
+
+    if (n_integrals >= 0) then
+
+      if (n_integrals > sze) then
+        deallocate (buffer_value, buffer_i)
+        sze = n_integrals
+        allocate (buffer_value(sze), buffer_i(sze))
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)
+      if (rc /= key_kind*n_integrals) then
+        print *,  rc, key_kind, n_integrals
+        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)'
+        stop 'error'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)
+      if (rc /= integral_kind*n_integrals) then
+        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)'
+        stop 'error'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
+
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+      rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
+      if (rc /= 4) then
+        print *,  irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
+        stop 'error'
+      endif
+IRP_ENDIF
+
+
+      call insert_into_ao_tc_sym_two_e_pot_map(n_integrals,buffer_i,buffer_value)
+      accu += n_integrals
+      if (task_id /= 0) then
+        integer, external :: zmq_delete_task
+        if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then
+          stop 'Unable to delete task'
+        endif
+      endif
+    endif
+
+  enddo
+
+  deallocate( buffer_i, buffer_value )
+
+  integer (map_size_kind) :: get_ao_tc_sym_two_e_pot_map_size
+  control = get_ao_tc_sym_two_e_pot_map_size(ao_tc_sym_two_e_pot_map)
+
+  if (control /= accu) then
+      print *, ''
+      print *, irp_here
+      print *, 'Control : ', control
+      print *, 'Accu    : ', accu
+      print *, 'Some integrals were lost during the parallel computation.'
+      print *, 'Try to reduce the number of threads.'
+      stop
+  endif
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+
+end
+
diff --git a/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f b/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f
new file mode 100644
index 00000000..95dc664d
--- /dev/null
+++ b/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f
@@ -0,0 +1,313 @@
+use map_module
+
+!! AO Map
+!! ======
+
+BEGIN_PROVIDER [ type(map_type), ao_tc_sym_two_e_pot_map ]
+  implicit none
+  BEGIN_DOC
+  ! |AO| integrals
+  END_DOC
+  integer(key_kind)              :: key_max
+  integer(map_size_kind)         :: sze
+  call two_e_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max)
+  sze = key_max
+  call map_init(ao_tc_sym_two_e_pot_map,sze)
+  print*,  'ao_tc_sym_two_e_pot_map map initialized : ', sze
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, ao_tc_sym_two_e_pot_cache_min ]
+&BEGIN_PROVIDER [ integer, ao_tc_sym_two_e_pot_cache_max ]
+ implicit none
+ BEGIN_DOC
+ ! Min and max values of the AOs for which the integrals are in the cache
+ END_DOC
+ ao_tc_sym_two_e_pot_cache_min = max(1,ao_num - 63)
+ ao_tc_sym_two_e_pot_cache_max = ao_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_tc_sym_two_e_pot_cache, (0:64*64*64*64) ]
+
+  use map_module
+  implicit none
+
+  BEGIN_DOC
+  ! Cache of |AO| integrals for fast access
+  END_DOC
+
+  integer                        :: i,j,k,l,ii
+  integer(key_kind)              :: idx
+  real(integral_kind)            :: integral
+
+  PROVIDE ao_tc_sym_two_e_pot_in_map
+
+ !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral)
+  do l = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+    do k = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+      do j = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+        do i = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+          !DIR$ FORCEINLINE
+          call two_e_integrals_index(i, j, k, l, idx)
+          !DIR$ FORCEINLINE
+          call map_get(ao_tc_sym_two_e_pot_map, idx, integral)
+          ii = l-ao_tc_sym_two_e_pot_cache_min
+          ii = ior( ishft(ii,6), k-ao_tc_sym_two_e_pot_cache_min)
+          ii = ior( ishft(ii,6), j-ao_tc_sym_two_e_pot_cache_min)
+          ii = ior( ishft(ii,6), i-ao_tc_sym_two_e_pot_cache_min)
+          ao_tc_sym_two_e_pot_cache(ii) = integral
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+subroutine insert_into_ao_tc_sym_two_e_pot_map(n_integrals, buffer_i, buffer_values)
+
+  use map_module
+  implicit none
+
+  BEGIN_DOC
+  ! Create new entry into |AO| map
+  END_DOC
+
+  integer, intent(in)                :: n_integrals
+  integer(key_kind), intent(inout)   :: buffer_i(n_integrals)
+  real(integral_kind), intent(inout) :: buffer_values(n_integrals)
+
+  call map_append(ao_tc_sym_two_e_pot_map, buffer_i, buffer_values, n_integrals)
+
+end
+
+! ---
+
+double precision function get_ao_tc_sym_two_e_pot(i, j, k, l, map) result(result)
+
+  use map_module
+
+  implicit none
+
+  BEGIN_DOC
+  ! Gets one |AO| two-electron integral from the |AO| map
+  END_DOC
+
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx
+  type(map_type), intent(inout)  :: map
+  integer                        :: ii
+  real(integral_kind)            :: tmp
+  logical, external              :: ao_two_e_integral_zero
+
+  PROVIDE ao_tc_sym_two_e_pot_in_map ao_tc_sym_two_e_pot_cache ao_tc_sym_two_e_pot_cache_min
+
+  !DIR$ FORCEINLINE
+!  if (ao_two_e_integral_zero(i,j,k,l)) then
+  if (.False.) then
+    tmp = 0.d0
+  !else if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < ao_integrals_threshold) then
+  !  tmp = 0.d0
+  else
+    ii = l-ao_tc_sym_two_e_pot_cache_min
+    ii = ior(ii, k-ao_tc_sym_two_e_pot_cache_min)
+    ii = ior(ii, j-ao_tc_sym_two_e_pot_cache_min)
+    ii = ior(ii, i-ao_tc_sym_two_e_pot_cache_min)
+    if (iand(ii, -64) /= 0) then
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i, j, k, l, idx)
+      !DIR$ FORCEINLINE
+      call map_get(map, idx, tmp)
+      tmp = tmp
+    else
+      ii = l-ao_tc_sym_two_e_pot_cache_min
+      ii = ior( ishft(ii,6), k-ao_tc_sym_two_e_pot_cache_min)
+      ii = ior( ishft(ii,6), j-ao_tc_sym_two_e_pot_cache_min)
+      ii = ior( ishft(ii,6), i-ao_tc_sym_two_e_pot_cache_min)
+      tmp = ao_tc_sym_two_e_pot_cache(ii)
+    endif
+  endif
+
+  result = tmp
+
+end
+
+! ---
+
+subroutine get_many_ao_tc_sym_two_e_pot(j,k,l,sze,out_val)
+  use map_module
+  BEGIN_DOC
+  ! Gets multiple |AO| two-electron integral from the |AO| map .
+  ! All i are retrieved for j,k,l fixed.
+  END_DOC
+  implicit none
+  integer, intent(in)            :: j,k,l, sze
+  real(integral_kind), intent(out) :: out_val(sze)
+
+  integer                        :: i
+  integer(key_kind)              :: hash
+  double precision               :: thresh
+! logical, external              :: ao_one_e_integral_zero
+  PROVIDE ao_tc_sym_two_e_pot_in_map ao_tc_sym_two_e_pot_map
+  thresh = ao_integrals_threshold
+
+! if (ao_one_e_integral_zero(j,l)) then
+  if (.False.) then
+    out_val = 0.d0
+    return
+  endif
+
+  double precision :: get_ao_tc_sym_two_e_pot
+  do i=1,sze
+    out_val(i) = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map)
+  enddo
+
+end
+
+subroutine get_many_ao_tc_sym_two_e_pot_non_zero(j,k,l,sze,out_val,out_val_index,non_zero_int)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets multiple |AO| two-electron integrals from the |AO| map .
+  ! All non-zero i are retrieved for j,k,l fixed.
+  END_DOC
+  integer, intent(in)            :: j,k,l, sze
+  real(integral_kind), intent(out) :: out_val(sze)
+  integer, intent(out)           :: out_val_index(sze),non_zero_int
+
+  integer                        :: i
+  integer(key_kind)              :: hash
+  double precision               :: thresh,tmp
+! logical, external              :: ao_one_e_integral_zero
+  PROVIDE ao_tc_sym_two_e_pot_in_map
+  thresh = ao_integrals_threshold
+
+  non_zero_int = 0
+! if (ao_one_e_integral_zero(j,l)) then
+  if (.False.) then
+    out_val = 0.d0
+    return
+  endif
+
+  non_zero_int = 0
+  do i=1,sze
+    integer, external :: ao_l4
+    double precision, external :: ao_two_e_integral_eff_pot
+    !DIR$ FORCEINLINE
+    !if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thresh) then
+    !  cycle
+    !endif
+    call two_e_integrals_index(i,j,k,l,hash)
+    call map_get(ao_tc_sym_two_e_pot_map, hash,tmp)
+    if (dabs(tmp) < thresh ) cycle
+    non_zero_int = non_zero_int+1
+    out_val_index(non_zero_int) = i
+    out_val(non_zero_int) = tmp
+  enddo
+
+end
+
+
+function get_ao_tc_sym_two_e_pot_map_size()
+  implicit none
+  integer (map_size_kind) :: get_ao_tc_sym_two_e_pot_map_size
+  BEGIN_DOC
+  ! Returns the number of elements in the |AO| map
+  END_DOC
+  get_ao_tc_sym_two_e_pot_map_size = ao_tc_sym_two_e_pot_map % n_elements
+end
+
+subroutine clear_ao_tc_sym_two_e_pot_map
+  implicit none
+  BEGIN_DOC
+  ! Frees the memory of the |AO| map
+  END_DOC
+  call map_deinit(ao_tc_sym_two_e_pot_map)
+  FREE ao_tc_sym_two_e_pot_map
+end
+
+
+
+subroutine dump_ao_tc_sym_two_e_pot(filename)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Save to disk the |AO| eff_pot integrals
+  END_DOC
+  character*(*), intent(in)      :: filename
+  integer(cache_key_kind), pointer :: key(:)
+  real(integral_kind), pointer   :: val(:)
+  integer*8                      :: i,j, n
+  call ezfio_set_work_empty(.False.)
+  open(unit=66,file=filename,FORM='unformatted')
+  write(66) integral_kind, key_kind
+  write(66) ao_tc_sym_two_e_pot_map%sorted, ao_tc_sym_two_e_pot_map%map_size,    &
+      ao_tc_sym_two_e_pot_map%n_elements
+  do i=0_8,ao_tc_sym_two_e_pot_map%map_size
+    write(66) ao_tc_sym_two_e_pot_map%map(i)%sorted, ao_tc_sym_two_e_pot_map%map(i)%map_size,&
+        ao_tc_sym_two_e_pot_map%map(i)%n_elements
+  enddo
+  do i=0_8,ao_tc_sym_two_e_pot_map%map_size
+    key => ao_tc_sym_two_e_pot_map%map(i)%key
+    val => ao_tc_sym_two_e_pot_map%map(i)%value
+    n = ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    write(66) (key(j), j=1,n), (val(j), j=1,n)
+  enddo
+  close(66)
+
+end
+
+
+
+integer function load_ao_tc_sym_two_e_pot(filename)
+  implicit none
+  BEGIN_DOC
+  ! Read from disk the |AO| eff_pot integrals
+  END_DOC
+  character*(*), intent(in)      :: filename
+  integer*8                      :: i
+  integer(cache_key_kind), pointer :: key(:)
+  real(integral_kind), pointer   :: val(:)
+  integer                        :: iknd, kknd
+  integer*8                      :: n, j
+  load_ao_tc_sym_two_e_pot = 1
+  open(unit=66,file=filename,FORM='unformatted',STATUS='UNKNOWN')
+  read(66,err=98,end=98) iknd, kknd
+  if (iknd /= integral_kind) then
+    print *,  'Wrong integrals kind in file :', iknd
+    stop 1
+  endif
+  if (kknd /= key_kind) then
+    print *,  'Wrong key kind in file :', kknd
+    stop 1
+  endif
+  read(66,err=98,end=98) ao_tc_sym_two_e_pot_map%sorted, ao_tc_sym_two_e_pot_map%map_size,&
+      ao_tc_sym_two_e_pot_map%n_elements
+  do i=0_8, ao_tc_sym_two_e_pot_map%map_size
+    read(66,err=99,end=99) ao_tc_sym_two_e_pot_map%map(i)%sorted,          &
+        ao_tc_sym_two_e_pot_map%map(i)%map_size, ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    call cache_map_reallocate(ao_tc_sym_two_e_pot_map%map(i),ao_tc_sym_two_e_pot_map%map(i)%map_size)
+  enddo
+  do i=0_8, ao_tc_sym_two_e_pot_map%map_size
+    key => ao_tc_sym_two_e_pot_map%map(i)%key
+    val => ao_tc_sym_two_e_pot_map%map(i)%value
+    n = ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    read(66,err=99,end=99) (key(j), j=1,n), (val(j), j=1,n)
+  enddo
+  call map_sort(ao_tc_sym_two_e_pot_map)
+  load_ao_tc_sym_two_e_pot = 0
+  return
+  99 continue
+  call map_deinit(ao_tc_sym_two_e_pot_map)
+  98 continue
+  stop 'Problem reading ao_tc_sym_two_e_pot_map file in work/'
+
+end
+
+
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f
new file mode 100644
index 00000000..50c396de
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f
@@ -0,0 +1,332 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_hermII, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  :math:`\langle \chi_A | -0.5 \grad \tau_{1b} \cdot \grad \tau_{1b} | \chi_B \rangle` 
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k1, k2, l, m
+  double precision :: alpha, beta, gama1, gama2, coef1, coef2
+  double precision :: A_center(3), B_center(3), C_center1(3), C_center2(3)
+  double precision :: c1, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_4G
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+
+  j1b_gauss_hermII(1:ao_num,1:ao_num) = 0.d0
+
+  if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k1, k2, l, m, alpha, beta, gama1, gama2,  &
+ !$OMP          A_center, B_center, C_center1, C_center2,       &
+ !$OMP          power_A, power_B, num_A, num_B, c1, c)          &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermII)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k1 = 1, nucl_num
+              gama1          = j1b_pen(k1)
+              C_center1(1:3) = nucl_coord(k1,1:3)
+  
+              do k2 = 1, nucl_num
+                gama2          = j1b_pen(k2)
+                C_center2(1:3) = nucl_coord(k2,1:3)
+  
+                ! < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+                c1 = int_gauss_4G( A_center, B_center, C_center1, C_center2     &
+                                 , power_A, power_B, alpha, beta, gama1, gama2  )
+  
+                c = c - 2.d0 * gama1 * gama2 * c1
+              enddo
+            enddo
+  
+            j1b_gauss_hermII(i,j) = j1b_gauss_hermII(i,j)      & 
+                      + ao_coef_normalized_ordered_transp(l,j) &
+                      * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k1, k2, l, m, alpha, beta, gama1, gama2,  &
+ !$OMP          A_center, B_center, C_center1, C_center2,       &
+ !$OMP          power_A, power_B, num_A, num_B, c1, c,          &
+ !$OMP          coef1, coef2)                                   &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermII,             &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k1 = 1, nucl_num
+              gama1          = j1b_pen  (k1)
+              coef1          = j1b_coeff(k1)
+              C_center1(1:3) = nucl_coord(k1,1:3)
+  
+              do k2 = 1, nucl_num
+                gama2          = j1b_pen  (k2)
+                coef2          = j1b_coeff(k2)
+                C_center2(1:3) = nucl_coord(k2,1:3)
+  
+                ! < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+                c1 = int_gauss_4G( A_center, B_center, C_center1, C_center2     &
+                                 , power_A, power_B, alpha, beta, gama1, gama2  )
+  
+                c = c - 2.d0 * gama1 * gama2 * coef1 * coef2 * c1
+              enddo
+            enddo
+  
+            j1b_gauss_hermII(i,j) = j1b_gauss_hermII(i,j)      & 
+                      + ao_coef_normalized_ordered_transp(l,j) &
+                      * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!               < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+!
+double precision function int_gauss_4G( A_center, B_center, C_center1, C_center2, power_A, power_B &
+                                      , alpha, beta, gama1, gama2 )
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center1(3), C_center2(3)
+  double precision, intent(in) :: alpha, beta, gama1, gama2
+
+  integer                      :: i, dim1, power_C
+  integer                      :: iorder(3)
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: gama, fact_C, C_center(3)
+  double precision             :: cx0, cy0, cz0, c_tmp1, c_tmp2, cx, cy, cz
+  double precision             :: int_tmp
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  call gaussian_product(gama1, C_center1, gama2, C_center2, fact_C, gama, C_center)
+
+  ! <<<
+  ! to avoid multi-evaluation
+  power_C = 0
+
+  cx0 = 0.d0
+  do i = 0, iorder(1)
+    cx0 = cx0 + P_AB(i,1) * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy0 = 0.d0
+  do i = 0, iorder(2)
+    cy0 = cy0 + P_AB(i,2) * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz0 = 0.d0
+  do i = 0, iorder(3)
+    cz0 = cz0 + P_AB(i,3) * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  ! >>>
+
+  int_tmp = 0.d0
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! x term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (x - x_C1) (x - x_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(1) - C_center1(1) - C_center2(1)
+  c_tmp2 = ( C_center(1) - C_center1(1) ) * ( C_center(1) - C_center2(1) ) 
+
+  cx = 0.d0
+  do i = 0, iorder(1)
+    
+    ! < XA | exp[-gama r_C^2] (x - x_C)^2 | XB >
+    power_C = 2
+    cx      = cx + P_AB(i,1) &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (x - x_C) | XB >
+    power_C = 1
+    cx      = cx + P_AB(i,1) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cx      = cx + P_AB(i,1) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx * cy0 * cz0
+
+  ! -----------------------------------------------------------------------------------------------
+
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! y term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (y - y_C1) (y - y_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(2) - C_center1(2) - C_center2(2)
+  c_tmp2 = ( C_center(2) - C_center1(2) ) * ( C_center(2) - C_center2(2) ) 
+
+  cy = 0.d0
+  do i = 0, iorder(2)
+    
+    ! < XA | exp[-gama r_C^2] (y - y_C)^2 | XB >
+    power_C = 2
+    cy      = cy + P_AB(i,2) &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (y - y_C) | XB >
+    power_C = 1
+    cy      = cy + P_AB(i,2) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cy      = cy + P_AB(i,2) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx0 * cy * cz0
+
+  ! -----------------------------------------------------------------------------------------------
+
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! z term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (z - z_C1) (z - z_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(3) - C_center1(3) - C_center2(3)
+  c_tmp2 = ( C_center(3) - C_center1(3) ) * ( C_center(3) - C_center2(3) ) 
+
+  cz = 0.d0
+  do i = 0, iorder(3)
+    
+    ! < XA | exp[-gama r_C^2] (z - z_C)^2 | XB >
+    power_C = 2
+    cz      = cz + P_AB(i,3) &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (z - z_C) | XB >
+    power_C = 1
+    cz      = cz + P_AB(i,3) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cz      = cz + P_AB(i,3) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx0 * cy0 * cz
+
+  ! -----------------------------------------------------------------------------------------------
+
+  int_gauss_4G = fact_AB * fact_C * int_tmp
+
+  return
+end function int_gauss_4G
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f
new file mode 100644
index 00000000..0a0b7610
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f
@@ -0,0 +1,303 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_hermI, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  :math:`\langle \chi_A | -0.5 \Delta \tau_{1b} | \chi_B \rangle` 
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k, l, m
+  double precision :: alpha, beta, gama, coef
+  double precision :: A_center(3), B_center(3), C_center(3)
+  double precision :: c1, c2, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_r0, int_gauss_r2
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+  j1b_gauss_hermI(1:ao_num,1:ao_num) = 0.d0
+
+  if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama,               &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c2, c)                        &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermI)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              ! < XA | exp[-gama r_C^2] | XB >
+              c1 = int_gauss_r0( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              ! < XA | r_A^2 exp[-gama r_C^2] | XB >
+              c2 = int_gauss_r2( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 3.d0 * gama * c1 - 2.d0 * gama * gama * c2
+            enddo
+  
+            j1b_gauss_hermI(i,j) = j1b_gauss_hermI(i,j)      & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama, coef,         &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c2, c)                        &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermI,              &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen  (k)
+              coef          = j1b_coeff(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              ! < XA | exp[-gama r_C^2] | XB >
+              c1 = int_gauss_r0( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              ! < XA | r_A^2 exp[-gama r_C^2] | XB >
+              c2 = int_gauss_r2( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 3.d0 * gama * coef * c1 - 2.d0 * gama * gama * coef * c2
+            enddo
+  
+            j1b_gauss_hermI(i,j) = j1b_gauss_hermI(i,j)      & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+!_____________________________________________________________________________________________________________
+!
+!                             < XA | exp[-gama r_C^2] | XB >
+!
+double precision function int_gauss_r0(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3)
+  integer                      :: nmax
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: cx, cy, cz 
+
+  double precision             :: overlap_gaussian_x 
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  if( fact_AB .lt. 1d-20 ) then
+    int_gauss_r0 = 0.d0
+    return
+  endif
+
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_r0 = fact_AB * cx * cy * cz
+
+  return
+end function int_gauss_r0 
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!                             < XA | r_C^2 exp[-gama r_C^2] | XB >
+!
+double precision function int_gauss_r2(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3)
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: cx0, cy0, cz0, cx, cy, cz
+  double precision             :: int_tmp
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial centered on AB_center
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  ! <<<
+  ! to avoid multi-evaluation
+  power_C = 0
+
+  cx0 = 0.d0
+  do i = 0, iorder(1)
+    cx0 = cx0 + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy0 = 0.d0
+  do i = 0, iorder(2)
+    cy0 = cy0 + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz0 = 0.d0
+  do i = 0, iorder(3)
+    cz0 = cz0 + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  ! >>>
+
+  int_tmp = 0.d0
+
+  power_C = 2
+
+  ! ( x - XC)^2
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx * cy0 * cz0
+
+  ! ( y - YC)^2
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx0 * cy * cz0
+
+  ! ( z - ZC)^2
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx0 * cy0 * cz
+
+  int_gauss_r2 = fact_AB * int_tmp
+
+  return
+end function int_gauss_r2
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f
new file mode 100644
index 00000000..bd881d32
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f
@@ -0,0 +1,371 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_nonherm, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  j1b_gauss_nonherm(i,j) = \langle \chi_j | - grad \tau_{1b} \cdot grad | \chi_i \rangle  
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k, l, m
+  double precision :: alpha, beta, gama, coef
+  double precision :: A_center(3), B_center(3), C_center(3)
+  double precision :: c1, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_deriv
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+
+  j1b_gauss_nonherm(1:ao_num,1:ao_num) = 0.d0
+
+   if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)] 
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama,               &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c)                            &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_nonherm)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              !  \langle \chi_A | exp[-gama r_C^2] r_C \cdot grad | \chi_B \rangle
+              c1 = int_gauss_deriv( A_center, B_center, C_center        &
+                                  , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 2.d0 * gama * c1 
+            enddo
+  
+            j1b_gauss_nonherm(i,j) =  j1b_gauss_nonherm(i,j) & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama, coef,         &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c)                            &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_nonherm,            &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen  (k)
+              coef          = j1b_coeff(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              !  \langle \chi_A | exp[-gama r_C^2] r_C \cdot grad | \chi_B \rangle
+              c1 = int_gauss_deriv( A_center, B_center, C_center        &
+                                  , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 2.d0 * gama * coef * c1 
+            enddo
+  
+            j1b_gauss_nonherm(i,j) =  j1b_gauss_nonherm(i,j) & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!                            < XA | exp[-gama r_C^2] r_C \cdot grad | XB >
+!
+double precision function int_gauss_deriv(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3), power_D(3)
+  double precision             :: AB_expo
+  double precision             :: fact_AB, center_AB(3), pol_AB(0:max_dim,3)
+  double precision             :: cx, cy, cz
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  int_gauss_deriv = 0.d0
+
+  ! ===============
+  ! term I:
+  !     \partial_x
+  ! ===============
+
+  if( power_B(1) .ge. 1 ) then
+
+    power_D(1) = power_B(1) - 1
+    power_D(2) = power_B(2)
+    power_D(3) = power_B(3)
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 1
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(1)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) + 1
+  power_D(2) = power_B(2)
+  power_D(3) = power_B(3)
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 1
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+
+  ! ===============
+  ! term II:
+  !     \partial_y
+  ! ===============
+
+  if( power_B(2) .ge. 1 ) then
+
+    power_D(1) = power_B(1) 
+    power_D(2) = power_B(2) - 1
+    power_D(3) = power_B(3)
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 0
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 1
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(2)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) 
+  power_D(2) = power_B(2) + 1
+  power_D(3) = power_B(3)
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 1
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+  ! ===============
+  ! term III:
+  !     \partial_z
+  ! ===============
+
+  if( power_B(3) .ge. 1 ) then
+
+    power_D(1) = power_B(1) 
+    power_D(2) = power_B(2) 
+    power_D(3) = power_B(3) - 1
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 0
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 1
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(3)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) 
+  power_D(2) = power_B(2)
+  power_D(3) = power_B(3) + 1
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 1
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+  return
+end function int_gauss_deriv
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
new file mode 100644
index 00000000..5b72b567
--- /dev/null
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -0,0 +1,335 @@
+! ---
+
+BEGIN_PROVIDER [integer, n_gauss_eff_pot]
+
+  BEGIN_DOC
+  ! number of gaussians to represent the effective potential :
+  !
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  !
+  ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+
+  n_gauss_eff_pot = ng_fit_jast + 1
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv]
+
+  BEGIN_DOC
+  ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+  n_gauss_eff_pot_deriv = ng_fit_jast
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)]
+&BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)]
+
+  BEGIN_DOC
+  ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
+  !
+  ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
+  !
+  ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer :: i
+ 
+  ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
+  do i = 1, ng_fit_jast
+   expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
+   coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
+  enddo
+
+  ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
+  expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf
+  coef_gauss_eff_pot(ng_fit_jast+1) =  1.d0 * mu_erf * inv_sq_pi
+
+END_PROVIDER 
+
+! ---
+
+double precision function eff_pot_gauss(x, mu)
+
+  BEGIN_DOC
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  END_DOC
+
+  implicit none
+  double precision, intent(in) :: x, mu
+
+  eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
+
+end
+
+! -------------------------------------------------------------------------------------------------
+! ---
+
+double precision function eff_pot_fit_gauss(x)
+ implicit none
+ BEGIN_DOC
+ ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) 
+ ! 
+ ! but fitted with gaussians 
+ END_DOC
+ double precision, intent(in) :: x
+ integer :: i
+ double precision :: alpha
+ eff_pot_fit_gauss = derf(mu_erf*x)/x
+ do i = 1, n_gauss_eff_pot
+  alpha = expo_gauss_eff_pot(i)
+  eff_pot_fit_gauss += coef_gauss_eff_pot(i) * dexp(-alpha*x*x)
+ enddo
+end
+
+BEGIN_PROVIDER [integer, n_fit_1_erf_x]
+ implicit none
+ BEGIN_DOC
+! 
+ END_DOC
+ n_fit_1_erf_x = 2
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [double precision, expos_slat_gauss_1_erf_x, (n_fit_1_erf_x)]
+ implicit none
+ BEGIN_DOC
+! 1 - erf(mu*x) is fitted with a Slater and gaussian as in Eq.A15 of  JCP 154, 084119 (2021)
+!
+! 1 - erf(mu*x) = e^{-expos_slat_gauss_1_erf_x(1) * mu *x} * e^{-expos_slat_gauss_1_erf_x(2) * mu^2 * x^2}
+ END_DOC
+ expos_slat_gauss_1_erf_x(1) = 1.09529d0
+ expos_slat_gauss_1_erf_x(2) = 0.756023d0
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x, (n_max_fit_slat)]
+&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x, (n_max_fit_slat)]
+
+  BEGIN_DOC
+  !
+  ! (1 - erf(mu*x)) = \sum_i coef_gauss_1_erf_x(i) * exp(-expo_gauss_1_erf_x(i) * x^2)
+  !
+  ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2) 
+  !
+  ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: expos(n_max_fit_slat), alpha, beta
+
+  alpha = expos_slat_gauss_1_erf_x(1) * mu_erf
+  call expo_fit_slater_gam(alpha, expos)
+  beta = expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf
+ 
+  do i = 1, n_max_fit_slat
+    expo_gauss_1_erf_x(i) = expos(i) + beta
+    coef_gauss_1_erf_x(i) = coef_fit_slat_gauss(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+double precision function fit_1_erf_x(x)
+
+  BEGIN_DOC
+  ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
+  END_DOC
+
+  implicit none
+  integer :: i
+  double precision, intent(in) :: x
+
+  fit_1_erf_x = 0.d0
+  do i = 1, n_max_fit_slat
+    fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
+  enddo
+
+end
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (ng_fit_jast)]
+
+  BEGIN_DOC
+  ! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2)
+  !
+  ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2)
+  !
+  ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: expos(ng_fit_jast), alpha, beta, tmp
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.85345277d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.31030624d0 , 0.64364964d0 /)
+    expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.33206082d0 , 0.52347449d0, 0.12605012d0   /)
+    expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.02956716d0, 0.17025555d0, 0.32774114d0, 0.39034764d0, 0.07822781d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.18331042d0  , 0.10971118d0  , 0.29949169d0  , 0.34853132d0  , 0.0394275d0   , 0.01874444d0   /)
+    expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.0213619d0   , 0.03221511d0  , 0.29966689d0  , 0.19178934d0  , 0.06154732d0  , 0.28214555d0  , 0.11125985d0   /)
+    expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.28189124d0  , 0.19518669d0  , 0.12161735d0  , 0.24257438d0  , 0.07309656d0  , 0.042435d0    , 0.01926109d0  , 0.02393415d0   /)
+    expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_1_erf_x_2 = (/  /)
+  !  expo_gauss_1_erf_x_2 = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = expos(i) + beta
+      coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i)
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+double precision function fit_1_erf_x_2(x)
+ implicit none
+ double precision, intent(in) :: x
+ BEGIN_DOC
+! fit_1_erf_x_2(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))^2
+ END_DOC
+ integer :: i
+ fit_1_erf_x_2 = 0.d0
+ do i = 1, n_max_fit_slat
+  fit_1_erf_x_2 += dexp(-expo_gauss_1_erf_x_2(i) *x*x) * coef_gauss_1_erf_x_2(i)
+ enddo
+
+end
+
+subroutine inv_r_times_poly(r, dist_r, dist_vec, poly)
+ implicit none
+ BEGIN_DOC
+! returns 
+!
+! poly(1) = x / sqrt(x^2+y^2+z^2), poly(2) = y / sqrt(x^2+y^2+z^2), poly(3) = z / sqrt(x^2+y^2+z^2)
+!
+! with the arguments  
+!
+! r(1)  = x, r(2) = y, r(3) = z, dist_r = sqrt(x^2+y^2+z^2)
+!
+! dist_vec(1) = sqrt(y^2+z^2), dist_vec(2) = sqrt(x^2+z^2), dist_vec(3) = sqrt(x^2+y^2)
+ END_DOC
+ double precision, intent(in) :: r(3), dist_r, dist_vec(3)
+ double precision, intent(out):: poly(3)
+ double precision :: inv_dist
+ integer :: i
+ if (dist_r.gt. 1.d-8)then
+  inv_dist = 1.d0/dist_r
+  do i = 1, 3
+   poly(i) = r(i) * inv_dist 
+  enddo
+ else
+  do i = 1, 3
+   if(dabs(r(i)).lt.dist_vec(i))then
+    inv_dist = 1.d0/dist_r
+    poly(i) = r(i) * inv_dist 
+   else !if(dabs(r(i)))then
+    poly(i) = 1.d0 
+!    poly(i) = 0.d0 
+   endif
+  enddo
+ endif
+end                      
diff --git a/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f b/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f
new file mode 100644
index 00000000..055bf323
--- /dev/null
+++ b/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f
@@ -0,0 +1,86 @@
+
+BEGIN_PROVIDER [ logical, ao_tc_sym_two_e_pot_in_map ]
+  implicit none
+  use f77_zmq
+  use map_module
+  BEGIN_DOC
+  !  Map of Atomic integrals
+  !     i(r1) j(r2) 1/r12 k(r1) l(r2)
+  END_DOC
+
+  integer                        :: i,j,k,l
+  double precision               :: ao_tc_sym_two_e_pot,cpu_1,cpu_2, wall_1, wall_2
+  double precision               :: integral, wall_0
+  include 'utils/constants.include.F'
+
+  ! For integrals file
+  integer(key_kind),allocatable  :: buffer_i(:)
+  integer,parameter              :: size_buffer = 1024*64
+  real(integral_kind),allocatable :: buffer_value(:)
+
+  integer                        :: n_integrals, rc
+  integer                        :: kk, m, j1, i1, lmax
+  character*(64)                 :: fmt
+
+  !double precision               :: j1b_gauss_coul_debug
+  !integral = j1b_gauss_coul_debug(1,1,1,1)
+
+  integral = ao_tc_sym_two_e_pot(1,1,1,1)
+
+  double precision               :: map_mb
+
+  print*, 'Providing the ao_tc_sym_two_e_pot_map integrals'
+  call wall_time(wall_0)
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
+  call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_tc_sym_two_e_pot')
+
+  character(len=:), allocatable :: task
+  allocate(character(len=ao_num*12) :: task)
+  write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))'
+  do l=1,ao_num
+    write(task,fmt) (i,l, i=1,l)
+    integer, external :: add_task_to_taskserver
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then
+      stop 'Unable to add task to server'
+    endif
+  enddo
+  deallocate(task)
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  PROVIDE nproc
+  !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1)
+      i = omp_get_thread_num()
+      if (i==0) then
+        call ao_tc_sym_two_e_pot_in_map_collector(zmq_socket_pull)
+      else
+        call ao_tc_sym_two_e_pot_in_map_slave_inproc(i)
+      endif
+  !$OMP END PARALLEL
+
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_tc_sym_two_e_pot')
+
+
+  print*, 'Sorting the map'
+  call map_sort(ao_tc_sym_two_e_pot_map)
+  call cpu_time(cpu_2)
+  call wall_time(wall_2)
+  integer(map_size_kind)         :: get_ao_tc_sym_two_e_pot_map_size, ao_eff_pot_map_size
+  ao_eff_pot_map_size = get_ao_tc_sym_two_e_pot_map_size()
+
+  print*, 'AO eff_pot integrals provided:'
+  print*, ' Size of AO eff_pot map :         ', map_mb(ao_tc_sym_two_e_pot_map) ,'MB'
+  print*, ' Number of AO eff_pot integrals :', ao_eff_pot_map_size
+  print*, ' cpu  time :',cpu_2 - cpu_1, 's'
+  print*, ' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
+
+  ao_tc_sym_two_e_pot_in_map = .True.
+
+
+END_PROVIDER
diff --git a/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f b/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f
new file mode 100644
index 00000000..c36ee9b4
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f
@@ -0,0 +1,728 @@
+! ---
+
+double precision function j1b_gauss_2e_j1(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                    + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, num_ii 
+  integer             :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p(3), iorder_q(3)
+  integer             :: shift_P(3), shift_Q(3)
+  integer             :: dim1
+
+  double precision    :: coef1, coef2, coef3, coef4
+  double precision    :: expo1, expo2, expo3, expo4
+  double precision    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision    :: ff, gg, cx, cy, cz
+
+  double precision    :: j1b_gauss_2e_j1_schwartz
+  
+  if( ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
+    j1b_gauss_2e_j1 = j1b_gauss_2e_j1_schwartz(i, j, k, l)
+    return
+  endif
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+  j1b_gauss_2e_j1 = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+    expo1 = ao_expo_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+      expo2 = ao_expo_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      do r = 1, ao_prim_num(k)
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j1 = j1b_gauss_2e_j1 + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  return
+end function j1b_gauss_2e_j1
+
+! ---
+
+double precision function j1b_gauss_2e_j1_schwartz(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                    + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, num_ii 
+  integer                       :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p(3), iorder_q(3)
+  integer                       :: dim1
+
+  double precision              :: coef1, coef2, coef3, coef4
+  double precision              :: expo1, expo2, expo3, expo4
+  double precision              :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision              :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision              :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision              :: cx, cy, cz
+  double precision              :: schwartz_ij, thr
+  double precision, allocatable :: schwartz_kl(:,:) 
+
+  PROVIDE j1b_pen
+
+  dim1 = n_pt_max_integrals
+  thr  = ao_integrals_threshold * ao_integrals_threshold
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+
+  allocate( schwartz_kl(0:ao_prim_num(l) , 0:ao_prim_num(k)) )
+
+  schwartz_kl(0,0) = 0.d0
+  do r = 1, ao_prim_num(k)
+    expo3 = ao_expo_ordered_transp(r,k)
+    coef3 = ao_coef_normalized_ordered_transp(r,k) * ao_coef_normalized_ordered_transp(r,k)
+
+    schwartz_kl(0,r) = 0.d0
+    do s = 1, ao_prim_num(l)
+      expo4 = ao_expo_ordered_transp(s,l)
+      coef4 = coef3 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+
+      call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                          , K_power, L_power, K_center, L_center, dim1 )
+      q1_inv = 1.d0 / qq1
+
+      call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+      schwartz_kl(s,r) = coef4 * dabs( cx + cy + cz )
+      schwartz_kl(0,r) = max( schwartz_kl(0,r) , schwartz_kl(s,r) )
+    enddo
+
+    schwartz_kl(0,0) = max( schwartz_kl(0,r) , schwartz_kl(0,0) )
+  enddo
+
+
+  j1b_gauss_2e_j1_schwartz = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    expo1 = ao_expo_ordered_transp(p, i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      expo2 = ao_expo_ordered_transp(q, j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p )
+
+      schwartz_ij = coef2 * coef2 * dabs( cx + cy + cz )
+      if( schwartz_kl(0,0) * schwartz_ij < thr ) cycle
+
+      do r = 1, ao_prim_num(k)
+        if( schwartz_kl(0,r) * schwartz_ij < thr ) cycle
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          if( schwartz_kl(s,r) * schwartz_ij < thr ) cycle
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j1_schwartz = j1b_gauss_2e_j1_schwartz + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  deallocate( schwartz_kl )
+
+  return
+end function j1b_gauss_2e_j1_schwartz
+
+! ---
+
+subroutine get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: dim1
+  integer,          intent(in)  :: iorder_p(3), iorder_q(3)
+  double precision, intent(in)  :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision, intent(in)  :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision, intent(out) :: cx, cy, cz
+
+  integer                       :: ii
+  integer                       :: shift_P(3), shift_Q(3)
+  double precision              :: expoii, factii, Centerii(3)
+  double precision              :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  double precision              :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  double precision              :: ff, gg
+
+  double precision              :: general_primitive_integral_erf_shifted
+  double precision              :: general_primitive_integral_coul_shifted
+
+  PROVIDE j1b_pen
+
+  cx = 0.d0
+  cy = 0.d0
+  cz = 0.d0
+  do ii = 1, nucl_num
+
+    expoii        = j1b_pen(ii)
+    Centerii(1:3) = nucl_coord(ii, 1:3)
+
+    call gaussian_product(pp1, P1_center, expoii, Centerii, factii, pp2, P2_center)
+    fact_p2 = fact_p1 * factii
+    p2_inv  = 1.d0 / pp2
+    call pol_modif_center( P1_center, P2_center, iorder_p, P1_new, P2_new )
+
+    call gaussian_product(qq1, Q1_center, expoii, Centerii, factii, qq2, Q2_center)
+    fact_q2 = fact_q1 * factii
+    q2_inv  = 1.d0 / qq2
+    call pol_modif_center( Q1_center, Q2_center, iorder_q, Q1_new, Q2_new )
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_Q = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+
+    shift_P = (/ 2, 0, 0 /)
+    cx = cx + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 1, 0, 0 /)
+    cx = cx + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cx = cx + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+
+    shift_P = (/ 0, 2, 0 /)
+    cy = cy + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 1, 0 /)
+    cy = cy + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cy = cy + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+
+    shift_P = (/ 0, 0, 2 /)
+    cz = cz + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 0, 1 /)
+    cz = cz + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cz = cz + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r2-RA)^2 exp(-aA r2A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_P = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = Q2_center(1) - Centerii(1) 
+
+    shift_Q = (/ 2, 0, 0 /)
+    cx = cx + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = Q2_center(2) - Centerii(2) 
+
+    shift_Q = (/ 0, 2, 0 /)
+    cy = cy + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = Q2_center(3) - Centerii(3) 
+
+    shift_Q = (/ 0, 0, 2 /)
+    cz = cz + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA) \cdot (r2-RA) exp(-aA r1A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+    gg = Q1_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+    gg = Q1_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+    gg = Q1_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P1_center(1) - Centerii(1) 
+    gg = Q2_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P1_center(2) - Centerii(2) 
+    gg = Q2_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P1_center(3) - Centerii(3) 
+    gg = Q2_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+  enddo
+
+  return
+end subroutine get_cxcycz_j1
+
+! ---
+
diff --git a/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f b/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f
new file mode 100644
index 00000000..a61b5336
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f
@@ -0,0 +1,729 @@
+! ---
+
+double precision function j1b_gauss_2e_j2(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A c_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                        + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, num_ii 
+  integer             :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p(3), iorder_q(3)
+  integer             :: shift_P(3), shift_Q(3)
+  integer             :: dim1
+
+  double precision    :: coef1, coef2, coef3, coef4
+  double precision    :: expo1, expo2, expo3, expo4
+  double precision    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision    :: ff, gg, cx, cy, cz
+
+  double precision    :: j1b_gauss_2e_j2_schwartz
+  
+  dim1 = n_pt_max_integrals
+
+  if( ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
+    j1b_gauss_2e_j2 = j1b_gauss_2e_j2_schwartz(i, j, k, l)
+    return
+  endif
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+  j1b_gauss_2e_j2 = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+    expo1 = ao_expo_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+      expo2 = ao_expo_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      do r = 1, ao_prim_num(k)
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j2 = j1b_gauss_2e_j2 + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  return
+end function j1b_gauss_2e_j2
+
+! ---
+
+double precision function j1b_gauss_2e_j2_schwartz(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A c_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                        + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, num_ii 
+  integer                       :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p(3), iorder_q(3)
+  integer                       :: dim1
+
+  double precision              :: coef1, coef2, coef3, coef4
+  double precision              :: expo1, expo2, expo3, expo4
+  double precision              :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision              :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision              :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision              :: cx, cy, cz
+  double precision              :: schwartz_ij, thr
+  double precision, allocatable :: schwartz_kl(:,:) 
+
+  dim1 = n_pt_max_integrals
+  thr  = ao_integrals_threshold * ao_integrals_threshold
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+
+  allocate( schwartz_kl(0:ao_prim_num(l) , 0:ao_prim_num(k)) )
+
+  schwartz_kl(0,0) = 0.d0
+  do r = 1, ao_prim_num(k)
+    expo3 = ao_expo_ordered_transp(r,k)
+    coef3 = ao_coef_normalized_ordered_transp(r,k) * ao_coef_normalized_ordered_transp(r,k)
+
+    schwartz_kl(0,r) = 0.d0
+    do s = 1, ao_prim_num(l)
+      expo4 = ao_expo_ordered_transp(s,l)
+      coef4 = coef3 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+
+      call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                          , K_power, L_power, K_center, L_center, dim1 )
+      q1_inv = 1.d0 / qq1
+
+      call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+      schwartz_kl(s,r) = coef4 * dabs( cx + cy + cz )
+      schwartz_kl(0,r) = max( schwartz_kl(0,r) , schwartz_kl(s,r) )
+    enddo
+
+    schwartz_kl(0,0) = max( schwartz_kl(0,r) , schwartz_kl(0,0) )
+  enddo
+
+
+  j1b_gauss_2e_j2_schwartz = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    expo1 = ao_expo_ordered_transp(p, i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      expo2 = ao_expo_ordered_transp(q, j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p )
+
+      schwartz_ij = coef2 * coef2 * dabs( cx + cy + cz )
+      if( schwartz_kl(0,0) * schwartz_ij < thr ) cycle
+
+      do r = 1, ao_prim_num(k)
+        if( schwartz_kl(0,r) * schwartz_ij < thr ) cycle
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          if( schwartz_kl(s,r) * schwartz_ij < thr ) cycle
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j2_schwartz = j1b_gauss_2e_j2_schwartz + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  deallocate( schwartz_kl )
+
+  return
+end function j1b_gauss_2e_j2_schwartz
+
+! ---
+
+subroutine get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: dim1
+  integer,          intent(in)  :: iorder_p(3), iorder_q(3)
+  double precision, intent(in)  :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision, intent(in)  :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision, intent(out) :: cx, cy, cz
+
+  integer                       :: ii
+  integer                       :: shift_P(3), shift_Q(3)
+  double precision              :: coefii, expoii, factii, Centerii(3)
+  double precision              :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  double precision              :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  double precision              :: ff, gg
+
+  double precision              :: general_primitive_integral_erf_shifted
+  double precision              :: general_primitive_integral_coul_shifted
+
+  PROVIDE j1b_pen j1b_coeff
+
+  cx = 0.d0
+  cy = 0.d0
+  cz = 0.d0
+  do ii = 1, nucl_num
+
+    expoii        = j1b_pen  (ii)
+    coefii        = j1b_coeff(ii)
+    Centerii(1:3) = nucl_coord(ii, 1:3)
+
+    call gaussian_product(pp1, P1_center, expoii, Centerii, factii, pp2, P2_center)
+    fact_p2 = fact_p1 * factii
+    p2_inv  = 1.d0 / pp2
+    call pol_modif_center( P1_center, P2_center, iorder_p, P1_new, P2_new )
+
+    call gaussian_product(qq1, Q1_center, expoii, Centerii, factii, qq2, Q2_center)
+    fact_q2 = fact_q1 * factii
+    q2_inv  = 1.d0 / qq2
+    call pol_modif_center( Q1_center, Q2_center, iorder_q, Q1_new, Q2_new )
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_Q = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+
+    shift_P = (/ 2, 0, 0 /)
+    cx = cx + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 1, 0, 0 /)
+    cx = cx + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+
+    shift_P = (/ 0, 0, 0 /)
+    cx = cx + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+
+    shift_P = (/ 0, 2, 0 /)
+    cy = cy + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 1, 0 /)
+    cy = cy + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cy = cy + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+
+    shift_P = (/ 0, 0, 2 /)
+    cz = cz + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 0, 1 /)
+    cz = cz + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cz = cz + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r2-RA)^2 exp(-aA r2A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_P = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = Q2_center(1) - Centerii(1) 
+
+    shift_Q = (/ 2, 0, 0 /)
+    cx = cx + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = Q2_center(2) - Centerii(2) 
+
+    shift_Q = (/ 0, 2, 0 /)
+    cy = cy + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = Q2_center(3) - Centerii(3) 
+
+    shift_Q = (/ 0, 0, 2 /)
+    cz = cz + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA) \cdot (r2-RA) exp(-aA r1A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+    gg = Q1_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+    gg = Q1_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+    gg = Q1_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P1_center(1) - Centerii(1) 
+    gg = Q2_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P1_center(2) - Centerii(2) 
+    gg = Q2_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P1_center(3) - Centerii(3) 
+    gg = Q2_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+  enddo
+
+  return
+end subroutine get_cxcycz_j2
+
+! ---
+
diff --git a/src/ao_tc_eff_map/two_e_ints_gauss.irp.f b/src/ao_tc_eff_map/two_e_ints_gauss.irp.f
new file mode 100644
index 00000000..51ef73a0
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_ints_gauss.irp.f
@@ -0,0 +1,327 @@
+double precision function ao_tc_sym_two_e_pot(i,j,k,l)
+  implicit none
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) (tc_pot(r12,mu)) k(r2) l(r2)
+  !
+  ! where (tc_pot(r12,mu)) is the scalar part of the potential EXCLUDING the term erf(mu r12)/r12. 
+  !
+  ! See Eq. (32) of JCP 154, 084119 (2021). 
+  END_DOC
+  integer,intent(in)             :: i,j,k,l
+  integer                        :: p,q,r,s
+  double precision               :: I_center(3),J_center(3),K_center(3),L_center(3)
+  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
+  double precision               :: integral
+  include 'utils/constants.include.F'
+  double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,pp
+  double precision               :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq
+  integer                        :: iorder_p(3), iorder_q(3)
+  double precision, allocatable  :: schwartz_kl(:,:)
+  double precision               :: schwartz_ij
+  double precision :: scw_gauss_int,general_primitive_integral_gauss
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+  ao_tc_sym_two_e_pot = 0.d0
+  double precision               :: thr
+  thr = ao_integrals_threshold*ao_integrals_threshold
+
+  allocate(schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)))
+
+      double precision               :: coef3
+      double precision               :: coef2
+      double precision               :: p_inv,q_inv
+      double precision               :: coef1
+      double precision               :: coef4
+
+    do p = 1, 3
+      I_power(p) = ao_power(i,p)
+      J_power(p) = ao_power(j,p)
+      K_power(p) = ao_power(k,p)
+      L_power(p) = ao_power(l,p)
+      I_center(p) = nucl_coord(num_i,p)
+      J_center(p) = nucl_coord(num_j,p)
+      K_center(p) = nucl_coord(num_k,p)
+      L_center(p) = nucl_coord(num_l,p)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_normalized_ordered_transp(r,k)*ao_coef_normalized_ordered_transp(r,k)
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+        call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
+            ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),                 &
+            K_power,L_power,K_center,L_center,dim1)
+        q_inv = 1.d0/qq
+        scw_gauss_int = general_primitive_integral_gauss(dim1,              &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q,             &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+
+        schwartz_kl(s,r) = dabs(scw_gauss_int * coef2)
+        schwartz_kl(0,r) = max(schwartz_kl(0,r),schwartz_kl(s,r))
+      enddo
+      schwartz_kl(0,0) = max(schwartz_kl(0,r),schwartz_kl(0,0))
+    enddo
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_normalized_ordered_transp(p,i)
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
+        call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
+            ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),                 &
+            I_power,J_power,I_center,J_center,dim1)
+        p_inv = 1.d0/pp
+        scw_gauss_int = general_primitive_integral_gauss(dim1,              &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p)
+        schwartz_ij = dabs(scw_gauss_int * coef2*coef2)
+        if (schwartz_kl(0,0)*schwartz_ij < thr) then
+           cycle
+        endif
+        do r = 1, ao_prim_num(k)
+          if (schwartz_kl(0,r)*schwartz_ij < thr) then
+             cycle
+          endif
+          coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
+          do s = 1, ao_prim_num(l)
+            if (schwartz_kl(s,r)*schwartz_ij < thr) then
+               cycle
+            endif
+            coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
+            call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q, &
+                ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),            &
+                K_power,L_power,K_center,L_center,dim1)
+            q_inv = 1.d0/qq
+            integral = general_primitive_integral_gauss(dim1,              &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+            ao_tc_sym_two_e_pot = ao_tc_sym_two_e_pot + coef4 * integral
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  deallocate (schwartz_kl)
+
+end
+
+
+double precision function general_primitive_integral_gauss(dim,      &
+      P_new,P_center,fact_p,p,p_inv,iorder_p,                        &
+      Q_new,Q_center,fact_q,q,q_inv,iorder_q)
+  implicit none
+  BEGIN_DOC
+  ! Computes the integral <pq|rs> where p,q,r,s are Gaussian primitives
+  END_DOC
+  integer,intent(in)             :: dim
+  include 'utils/constants.include.F'
+  double precision, intent(in)   :: P_new(0:max_dim,3),P_center(3),fact_p,p,p_inv
+  double precision, intent(in)   :: Q_new(0:max_dim,3),Q_center(3),fact_q,q,q_inv
+  integer, intent(in)            :: iorder_p(3)
+  integer, intent(in)            :: iorder_q(3)
+
+  double precision               :: r_cut,gama_r_cut,rho,dist
+  double precision               :: dx(0:max_dim),Ix_pol(0:max_dim),dy(0:max_dim),Iy_pol(0:max_dim),dz(0:max_dim),Iz_pol(0:max_dim)
+  integer                        :: n_Ix,n_Iy,n_Iz,nx,ny,nz
+  double precision               :: bla
+  integer                        :: ix,iy,iz,jx,jy,jz,i
+  double precision               :: a,b,c,d,e,f,accu,pq,const
+  double precision               :: pq_inv, p10_1, p10_2, p01_1, p01_2,pq_inv_2
+  integer                        :: n_pt_tmp,n_pt_out, iorder
+  double precision               :: d1(0:max_dim),d_poly(0:max_dim),rint,d1_screened(0:max_dim)
+  double precision :: thr
+
+  thr = ao_integrals_threshold
+
+  general_primitive_integral_gauss = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx,Ix_pol,dy,Iy_pol,dz,Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+
+  pq = p_inv*0.5d0*q_inv
+  pq_inv = 0.5d0/(p+q)
+  p10_1 = q*pq  ! 1/(2p)
+  p01_1 = p*pq  ! 1/(2q)
+  pq_inv_2 = pq_inv+pq_inv
+  p10_2 = pq_inv_2 * p10_1*q !0.5d0*q/(pq + p*p)
+  p01_2 = pq_inv_2 * p01_1*p !0.5d0*p/(q*q + pq)
+
+
+  accu = 0.d0
+  iorder = iorder_p(1)+iorder_q(1)+iorder_p(1)+iorder_q(1)
+  do ix=0,iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+    if (abs(P_new(ix,1)) < thr) cycle
+    a = P_new(ix,1)
+    do jx = 0, iorder_q(1)
+      d = a*Q_new(jx,1)
+      if (abs(d) < thr) cycle
+      !DIR$ FORCEINLINE
+      call give_polynom_mult_center_x(P_center(1),Q_center(1),ix,jx,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dx,nx)
+      !DIR$ FORCEINLINE
+      call add_poly_multiply(dx,nx,d,Ix_pol,n_Ix)
+    enddo
+  enddo
+  if (n_Ix == -1) then
+    return
+  endif
+  iorder = iorder_p(2)+iorder_q(2)+iorder_p(2)+iorder_q(2)
+  do ix=0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+    if (abs(P_new(iy,2)) > thr) then
+      b = P_new(iy,2)
+      do jy = 0, iorder_q(2)
+        e = b*Q_new(jy,2)
+        if (abs(e) < thr) cycle
+        !DIR$ FORCEINLINE
+        call   give_polynom_mult_center_x(P_center(2),Q_center(2),iy,jy,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dy,ny)
+        !DIR$ FORCEINLINE
+        call add_poly_multiply(dy,ny,e,Iy_pol,n_Iy)
+      enddo
+    endif
+  enddo
+  if (n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3)+iorder_q(3)+iorder_p(3)+iorder_q(3)
+  do ix=0,iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+    if (abs(P_new(iz,3)) > thr) then
+      c = P_new(iz,3)
+      do jz = 0, iorder_q(3)
+        f = c*Q_new(jz,3)
+        if (abs(f) < thr) cycle
+        !DIR$ FORCEINLINE
+        call   give_polynom_mult_center_x(P_center(3),Q_center(3),iz,jz,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dz,nz)
+        !DIR$ FORCEINLINE
+        call add_poly_multiply(dz,nz,f,Iz_pol,n_Iz)
+      enddo
+    endif
+  enddo
+  if (n_Iz == -1) then
+    return
+  endif
+
+  rho = p*q *pq_inv_2
+  dist =  (P_center(1) - Q_center(1))*(P_center(1) - Q_center(1)) +  &
+      (P_center(2) - Q_center(2))*(P_center(2) - Q_center(2)) +      &
+      (P_center(3) - Q_center(3))*(P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix+n_Iy
+  do i=0,n_pt_tmp
+    d_poly(i)=0.d0
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
+  if (n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp+n_Iz
+  do i=0,n_pt_out
+    d1(i)=0.d0
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
+
+  double precision :: aa,c_a,t_a,rho_old,w_a,pi_3,prefactor,inv_pq_3_2
+  double precision :: gauss_int
+  integer :: m
+  gauss_int = 0.d0
+  pi_3 = pi*pi*pi
+  inv_pq_3_2 = (p_inv * q_inv)**(1.5d0)
+  rho_old = (p*q)/(p+q)
+  prefactor = pi_3 * inv_pq_3_2 * fact_p * fact_q 
+  do i = 1, n_gauss_eff_pot ! browse the gaussians with different expo/coef
+  !do i = 1, n_gauss_eff_pot-1
+   aa = expo_gauss_eff_pot(i) 
+   c_a = coef_gauss_eff_pot(i)
+   t_a = dsqrt( aa /(rho_old + aa) ) 
+   w_a = dexp(-t_a*t_a*rho_old*dist)
+   accu = 0.d0
+   ! evaluation of the polynom Ix(t_a) * Iy(t_a) * Iz(t_a)
+   do m = 0, n_pt_out,2
+    accu += d1(m) * (t_a)**(dble(m)) 
+   enddo
+   ! equation A8 of PRA-70-062505 (2004) of Toul. Col. Sav. 
+   gauss_int = gauss_int + c_a * prefactor * (1.d0 - t_a*t_a)**(1.5d0) * w_a * accu
+  enddo
+
+  general_primitive_integral_gauss = gauss_int
+end
+
+subroutine compute_ao_integrals_gauss_jl(j,l,n_integrals,buffer_i,buffer_value)
+  implicit none
+  use map_module
+  BEGIN_DOC
+  !  Parallel client for AO integrals
+  END_DOC
+
+  integer, intent(in)            :: j,l
+  integer,intent(out)            :: n_integrals
+  integer(key_kind),intent(out)  :: buffer_i(ao_num*ao_num)
+  real(integral_kind),intent(out) :: buffer_value(ao_num*ao_num)
+
+  integer                        :: i,k
+  double precision               :: cpu_1,cpu_2, wall_1, wall_2
+  double precision               :: integral, wall_0
+  double precision               :: thr,ao_tc_sym_two_e_pot
+  integer                        :: kk, m, j1, i1
+  logical, external              :: ao_two_e_integral_zero
+
+  thr = ao_integrals_threshold
+
+  n_integrals = 0
+
+  j1 = j+ishft(l*l-l,-1)
+  do k = 1, ao_num           ! r1
+    i1 = ishft(k*k-k,-1)
+    if (i1 > j1) then
+      exit
+    endif
+    do i = 1, k
+      i1 += 1
+      if (i1 > j1) then
+        exit
+      endif
+!      if (ao_two_e_integral_zero(i,j,k,l)) then
+      if (.False.) then
+        cycle
+      endif
+      if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thr ) then
+        cycle
+      endif
+      !DIR$ FORCEINLINE
+      integral = ao_tc_sym_two_e_pot(i,k,j,l)  ! i,k : r1    j,l : r2
+      if (abs(integral) < thr) then
+        cycle
+      endif
+      n_integrals += 1
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+      buffer_value(n_integrals) = integral
+    enddo
+  enddo
+
+end
diff --git a/src/ao_tc_eff_map/useful_sub.irp.f b/src/ao_tc_eff_map/useful_sub.irp.f
new file mode 100644
index 00000000..4cfdcad2
--- /dev/null
+++ b/src/ao_tc_eff_map/useful_sub.irp.f
@@ -0,0 +1,364 @@
+! ---
+
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+double precision function general_primitive_integral_coul_shifted( dim                                                  &
+                                                                 , P_new, P_center, fact_p, p, p_inv, iorder_p, shift_P &
+                                                                 , Q_new, Q_center, fact_q, q, q_inv, iorder_q, shift_Q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: dim
+  integer,          intent(in) :: iorder_p(3), shift_P(3)
+  integer,          intent(in) :: iorder_q(3), shift_Q(3)
+  double precision, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  double precision, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                      :: n_Ix, n_Iy, n_Iz, nx, ny, nz
+  integer                      :: ix, iy, iz, jx, jy, jz, i
+  integer                      :: n_pt_tmp, n_pt_out, iorder
+  integer                      :: ii, jj
+  double precision             :: rho, dist
+  double precision             :: dx(0:max_dim), Ix_pol(0:max_dim)
+  double precision             :: dy(0:max_dim), Iy_pol(0:max_dim)
+  double precision             :: dz(0:max_dim), Iz_pol(0:max_dim)
+  double precision             :: a, b, c, d, e, f, accu, pq, const
+  double precision             :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  double precision             :: d1(0:max_dim), d_poly(0:max_dim)
+  double precision             :: p_plus_q
+
+  double precision             :: rint_sum
+
+  general_primitive_integral_coul_shifted = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+  p_plus_q = (p+q) 
+  pq       = p_inv * 0.5d0 * q_inv
+  pq_inv   = 0.5d0 / p_plus_q
+  p10_1    = q * pq             ! 1/(2p)
+  p01_1    = p * pq             ! 1/(2q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0 * q / (pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0 * p / (q*q + pq)
+
+  accu = 0.d0
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+
+    ii = ix + shift_P(1)
+    a  = P_new(ix,1)
+    if(abs(a) < thresh) cycle
+
+    do jx = 0, iorder_q(1)
+
+      jj = jx + shift_Q(1)
+      d  = a * Q_new(jx,1)
+      if(abs(d) < thresh) cycle
+
+      !DEC$ FORCEINLINE
+      call give_polynom_mult_center_x( P_center(1), Q_center(1), ii, jj &
+                                     , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx )
+      !DEC$ FORCEINLINE
+      call add_poly_multiply(dx, nx, d, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+
+    if(abs(P_new(iy,2)) > thresh) then
+
+      ii = iy + shift_P(2)
+      b  = P_new(iy,2)
+
+      do jy = 0, iorder_q(2)
+
+        jj = jy + shift_Q(2)
+        e  = b * Q_new(jy,2)
+        if(abs(e) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(2), Q_center(2), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dy, ny, e, Iy_pol, n_Iy)
+      enddo
+    endif
+  enddo
+  if(n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  do ix = 0, iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+
+    if( abs(P_new(iz,3)) > thresh ) then
+
+      ii = iz + shift_P(3)
+      c  = P_new(iz,3)
+
+      do jz = 0, iorder_q(3)
+
+        jj = jz + shift_Q(3)
+        f  = c * Q_new(jz,3)
+        if(abs(f) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(3), Q_center(3), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dz, nz, f, Iz_pol, n_Iz)
+      enddo
+    endif
+  enddo
+  if(n_Iz == -1) then
+    return
+  endif
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+  accu = accu + rint_sum(n_pt_out, const, d1)
+
+  general_primitive_integral_coul_shifted = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / dsqrt(p_plus_q)
+
+  return
+end function general_primitive_integral_coul_shifted
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+
+
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+double precision function general_primitive_integral_erf_shifted( dim                                                  &
+                                                                , P_new, P_center, fact_p, p, p_inv, iorder_p, shift_P &
+                                                                , Q_new, Q_center, fact_q, q, q_inv, iorder_q, shift_Q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: dim
+  integer,          intent(in) :: iorder_p(3), shift_P(3)
+  integer,          intent(in) :: iorder_q(3), shift_Q(3)
+  double precision, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  double precision, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                      :: n_Ix, n_Iy, n_Iz, nx, ny, nz
+  integer                      :: ix, iy, iz, jx, jy, jz, i
+  integer                      :: n_pt_tmp, n_pt_out, iorder
+  integer                      :: ii, jj
+  double precision             :: rho, dist
+  double precision             :: dx(0:max_dim), Ix_pol(0:max_dim)
+  double precision             :: dy(0:max_dim), Iy_pol(0:max_dim)
+  double precision             :: dz(0:max_dim), Iz_pol(0:max_dim)
+  double precision             :: a, b, c, d, e, f, accu, pq, const
+  double precision             :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  double precision             :: d1(0:max_dim), d_poly(0:max_dim)
+  double precision             :: p_plus_q
+
+  double precision             :: rint_sum
+
+  general_primitive_integral_erf_shifted = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+  p_plus_q = (p+q) * ( (p*q)/(p+q) + mu_erf*mu_erf ) / (mu_erf*mu_erf)
+  pq       = p_inv * 0.5d0 * q_inv
+  pq_inv   = 0.5d0 / p_plus_q
+  p10_1    = q * pq             ! 1/(2p)
+  p01_1    = p * pq             ! 1/(2q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0 * q / (pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0 * p / (q*q + pq)
+
+  accu = 0.d0
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+
+    ii = ix + shift_P(1)
+    a  = P_new(ix,1)
+    if(abs(a) < thresh) cycle
+
+    do jx = 0, iorder_q(1)
+
+      jj = jx + shift_Q(1)
+      d  = a * Q_new(jx,1)
+      if(abs(d) < thresh) cycle
+
+      !DEC$ FORCEINLINE
+      call give_polynom_mult_center_x( P_center(1), Q_center(1), ii, jj &
+                                     , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx )
+      !DEC$ FORCEINLINE
+      call add_poly_multiply(dx, nx, d, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+
+    if(abs(P_new(iy,2)) > thresh) then
+
+      ii = iy + shift_P(2)
+      b  = P_new(iy,2)
+
+      do jy = 0, iorder_q(2)
+
+        jj = jy + shift_Q(2)
+        e  = b * Q_new(jy,2)
+        if(abs(e) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(2), Q_center(2), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dy, ny, e, Iy_pol, n_Iy)
+      enddo
+    endif
+  enddo
+  if(n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  do ix = 0, iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+
+    if( abs(P_new(iz,3)) > thresh ) then
+
+      ii = iz + shift_P(3)
+      c  = P_new(iz,3)
+
+      do jz = 0, iorder_q(3)
+
+        jj = jz + shift_Q(3)
+        f  = c * Q_new(jz,3)
+        if(abs(f) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(3), Q_center(3), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dz, nz, f, Iz_pol, n_Iz)
+      enddo
+    endif
+  enddo
+  if(n_Iz == -1) then
+    return
+  endif
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+  accu = accu + rint_sum(n_pt_out, const, d1)
+
+  general_primitive_integral_erf_shifted = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / dsqrt(p_plus_q)
+
+  return
+end function general_primitive_integral_erf_shifted
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+
+
+
+
diff --git a/src/ao_two_e_ints/map_integrals.irp.f b/src/ao_two_e_ints/map_integrals.irp.f
index c3b206e1..fa7c29cc 100644
--- a/src/ao_two_e_ints/map_integrals.irp.f
+++ b/src/ao_two_e_ints/map_integrals.irp.f
@@ -142,7 +142,7 @@ subroutine ao_idx2_sq(i,j,ij)
     ij=i*i
   endif
 end
-  
+
 subroutine idx2_tri_int(i,j,ij)
   implicit none
   integer, intent(in)  :: i,j
@@ -152,7 +152,7 @@ subroutine idx2_tri_int(i,j,ij)
   q = min(i,j)
   ij = q+ishft(p*p-p,-1)
 end
-  
+
 subroutine ao_idx2_tri_key(i,j,ij)
   use map_module
   implicit none
@@ -163,8 +163,8 @@ subroutine ao_idx2_tri_key(i,j,ij)
   q = min(i,j)
   ij = q+ishft(p*p-p,-1)
 end
-  
-subroutine two_e_integrals_index_2fold(i,j,k,l,i1)                                           
+
+subroutine two_e_integrals_index_2fold(i,j,k,l,i1)
   use map_module
   implicit none
   integer, intent(in)            :: i,j,k,l
@@ -176,7 +176,7 @@ subroutine two_e_integrals_index_2fold(i,j,k,l,i1)
   call ao_idx2_tri_key(ik,jl,i1)
 end
 
-subroutine ao_idx2_sq_rev(i,k,ik)                                                                
+subroutine ao_idx2_sq_rev(i,k,ik)
   BEGIN_DOC
   ! reverse square compound index
   END_DOC
@@ -321,14 +321,15 @@ BEGIN_PROVIDER [ double precision, ao_integrals_cache, (0:64*64*64*64) ]
   !$OMP END PARALLEL DO
 END_PROVIDER
 
+! ---
 
-double precision function get_ao_two_e_integral(i,j,k,l,map) result(result)
+double precision function get_ao_two_e_integral(i, j, k, l, map) result(result)
   use map_module
   implicit none
   BEGIN_DOC
-  ! Gets one AO bi-electronic integral from the AO map
+  ! Gets one AO bi-electronic integral from the AO map in PHYSICIST NOTATION
   !
-  ! i,j,k,l in physicist notation <ij|kl>
+  ! <1:k, 2:l |1:i, 2:j> 
   END_DOC
   integer, intent(in)            :: i,j,k,l
   integer(key_kind)              :: idx
@@ -398,7 +399,7 @@ BEGIN_PROVIDER [ complex*16, ao_integrals_cache_periodic, (0:64*64*64*64) ]
            tmp_im = 0.d0
            integral = dcmplx(tmp_re,tmp_im)
          endif
-         
+
          ii = l-ao_integrals_cache_min
          ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
          ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
@@ -473,7 +474,7 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val)
   BEGIN_DOC
   ! Gets multiple AO bi-electronic integral from the AO map .
   ! All i are retrieved for j,k,l fixed.
-  ! physicist convention : <ij|kl> 
+  ! physicist convention : <ij|kl>
   END_DOC
   implicit none
   integer, intent(in)            :: j,k,l, sze
@@ -502,7 +503,7 @@ subroutine get_ao_two_e_integrals_periodic(j,k,l,sze,out_val)
   BEGIN_DOC
   ! Gets multiple AO bi-electronic integral from the AO map .
   ! All i are retrieved for j,k,l fixed.
-  ! physicist convention : <ij|kl> 
+  ! physicist convention : <ij|kl>
   END_DOC
   implicit none
   integer, intent(in)            :: j,k,l, sze
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index 8032bd92..83fbadfd 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -101,6 +101,7 @@ double precision function ao_two_e_integral(i,j,k,l)
     endif
 
   endif
+
 end
 
 double precision function ao_two_e_integral_schwartz_accel(i,j,k,l)
diff --git a/src/basis_correction/TODO b/src/basis_correction/TODO
index e28d593a..64a6ddeb 100644
--- a/src/basis_correction/TODO
+++ b/src/basis_correction/TODO
@@ -1 +1,4 @@
 change all correlation functionals with the pbe_on_top general
+factor 2 in two-rdm involved in: 
+  on-top, mu(r), pbe-on-top, sc_basis_corr and so on 
+  casscf : state_av_act_2_rdm_spin_trace_mo
diff --git a/src/basis_correction/pbe_on_top.irp.f b/src/basis_correction/pbe_on_top.irp.f
index cc9cdec9..9167f459 100644
--- a/src/basis_correction/pbe_on_top.irp.f
+++ b/src/basis_correction/pbe_on_top.irp.f
@@ -41,13 +41,13 @@
    
    if(mu_of_r_potential == "cas_ful")then
     ! You take the on-top of the CAS wave function which is computed with mu(r) 
-    on_top = on_top_cas_mu_r(ipoint,istate)
+    on_top =  on_top_cas_mu_r(ipoint,istate)
    else
     ! You take the on-top of the CAS wave function computed separately
     on_top = total_cas_on_top_density(ipoint,istate)
    endif
-!  We take the extrapolated on-top pair density * 2 because of normalization
-   on_top_extrap = 2.d0 * mu_correction_of_on_top(mu,on_top)
+!  We take the extrapolated on-top pair density 
+   on_top_extrap = mu_correction_of_on_top(mu,on_top)
 
    call ec_md_pbe_on_top_general(mu,rho_a,rho_b,grad_rho_a,grad_rho_b,on_top_extrap,eps_c_md_on_top_PBE)
  
@@ -108,8 +108,8 @@
     ! You take the on-top of the CAS wave function computed separately
     on_top = total_cas_on_top_density(ipoint,istate)
    endif
-!  We take the extrapolated on-top pair density * 2 because of normalization
-   on_top_extrap = 2.d0 * mu_correction_of_on_top(mu,on_top)
+!  We take the extrapolated on-top pair density 
+   on_top_extrap = mu_correction_of_on_top(mu,on_top)
 
    call ec_md_pbe_on_top_general(mu,rho_a,rho_b,grad_rho_a,grad_rho_b,on_top_extrap,eps_c_md_on_top_PBE)
  
@@ -170,8 +170,8 @@
     ! You take the on-top of the CAS wave function computed separately
     on_top = total_cas_on_top_density(ipoint,istate)
    endif
-!  We DO NOT take the extrapolated on-top pair density, but there is  * 2 because of normalization
-   on_top_extrap = 2.d0 * on_top
+!  We DO NOT take the extrapolated on-top pair density
+   on_top_extrap =  on_top
 
    call ec_md_pbe_on_top_general(mu,rho_a,rho_b,grad_rho_a,grad_rho_b,on_top_extrap,eps_c_md_on_top_PBE)
  
diff --git a/src/basis_correction/print_routine.irp.f b/src/basis_correction/print_routine.irp.f
index 67c5c6c2..c2558d22 100644
--- a/src/basis_correction/print_routine.irp.f
+++ b/src/basis_correction/print_routine.irp.f
@@ -18,7 +18,7 @@ subroutine print_basis_correction
  print*, ''
  print*, 'For more details look at Journal of Chemical Physics 149, 194301 1-15 (2018)        '
  print*, '                         Journal of Physical Chemistry Letters 10, 2931-2937 (2019) '
- print*, '                         ???REF SC?'
+ print*, '                         Journal of Chemical Physics 152, 174104 (2020)             '
  print*, '****************************************'
  print*, '****************************************'
  print*, 'mu_of_r_potential = ',mu_of_r_potential
@@ -56,14 +56,14 @@ subroutine print_basis_correction
    print*,''
    print*,'********************************************'
    print*,'********************************************'
-   print*,'+) PBE-on-top Ecmd functional : (??????? REF-SCF ??????????)' 
+   print*,'+) PBE-on-top Ecmd functional : JCP, 152, 174104 (2020) ' 
    print*,'PBE at mu=0, extrapolated ontop pair density at large mu, usual spin-polarization'
    do istate = 1, N_states
     write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-OT        , state ',istate,' = ',ecmd_pbe_on_top_mu_of_r(istate)
    enddo
    print*,''
    print*,'********************************************'
-   print*,'+) PBE-on-top no spin polarization Ecmd functional : (??????? REF-SCF ??????????)' 
+   print*,'+) PBE-on-top no spin polarization Ecmd functional : JCP, 152, 174104 (2020)' 
    print*,'PBE at mu=0, extrapolated ontop pair density at large mu, and ZERO SPIN POLARIZATION'
    do istate = 1, N_states
     write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD SU-PBE-OT     , state ',istate,' = ',ecmd_pbe_on_top_su_mu_of_r(istate)
diff --git a/src/bi_ort_ints/NEED b/src/bi_ort_ints/NEED
new file mode 100644
index 00000000..3ca12d93
--- /dev/null
+++ b/src/bi_ort_ints/NEED
@@ -0,0 +1,4 @@
+non_h_ints_mu
+ao_tc_eff_map
+bi_ortho_mos
+tc_keywords
diff --git a/src/bi_ort_ints/README.rst b/src/bi_ort_ints/README.rst
new file mode 100644
index 00000000..d496c4f7
--- /dev/null
+++ b/src/bi_ort_ints/README.rst
@@ -0,0 +1,25 @@
+===========
+bi_ort_ints
+===========
+
+This module contains all necessary integrals for the TC Hamiltonian in a bi-orthonormal (BO) MO Basis.
+See in bi_ortho_basis for more information. 
+The main providers are : 
+
+One-electron integrals 
+----------------------
++) ao_one_e_integrals_tc_tot : total one-electron Hamiltonian which might include non hermitian part coming from one-e correlation factor. 
++) mo_bi_ortho_tc_one_e : one-electron Hamiltonian (h_core+one-J terms) on the BO-MO basis. 
++) mo_bi_orth_bipole_x  : x-component of the dipole operator on the BO-MO basis. (Same for y,z) 
+
+Two-electron integrals 
+----------------------
++) ao_two_e_tc_tot : Total two-electron operator (including the non-hermitian term of the TC Hamiltonian) on the AO basis
++) mo_bi_ortho_tc_two_e : Total two-electron operator on the BO-MO basis
+
+Three-electron integrals 
+------------------------
++) three_body_ints_bi_ort : 6-indices three-electron tensor (-L) on the BO-MO basis. WARNING :: N^6 storage !
++) three_e_3_idx_direct_bi_ort : DIRECT term with 3 different indices of the -L operator. These terms appear in the DIAGONAL matrix element of the -L operator. The 5 other permutations needed to compute matrix elements can be found in three_body_ijm.irp.f 
++) three_e_4_idx_direct_bi_ort : DIRECT term with 4 different indices of the -L operator. These terms appear in the OFF-DIAGONAL matrix element of the -L operator including SINGLE EXCITATIONS. The 5 other permutations needed to compute matrix elements can be found in three_body_ijmk.irp.f 
++) three_e_5_idx_direct_bi_ort : DIRECT term with 5 different indices of the -L operator. These terms appear in the OFF-DIAGONAL matrix element of the -L operator including DOUBLE EXCITATIONS. The 5 other permutations needed to compute matrix elements can be found in three_body_ijmkl.irp.f 
diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f
new file mode 100644
index 00000000..ca50dd56
--- /dev/null
+++ b/src/bi_ort_ints/bi_ort_ints.irp.f
@@ -0,0 +1,44 @@
+program bi_ort_ints
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 10
+  my_n_pt_a_grid = 14
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call test_3e
+end
+
+subroutine test_3e
+ implicit none
+ integer :: i,k,j,l,m,n,ipoint
+ double precision :: accu, contrib,new,ref
+ i = 1
+ k = 1
+ accu = 0.d0
+ do i = 1, mo_num
+  do k = 1, mo_num 
+   do j = 1, mo_num
+    do l = 1, mo_num 
+     do m = 1, mo_num
+      do n = 1, mo_num
+        call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new)
+        call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref)
+        contrib = dabs(new - ref)
+        accu += contrib
+        if(contrib .gt. 1.d-10)then
+         print*,'pb !!'
+         print*,i,k,j,l,m,n
+         print*,ref,new,contrib
+        endif
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu = ',accu/dble(mo_num)**6
+
+
+end
diff --git a/src/bi_ort_ints/biorthog_mo_for_h.irp.f b/src/bi_ort_ints/biorthog_mo_for_h.irp.f
new file mode 100644
index 00000000..452c13f1
--- /dev/null
+++ b/src/bi_ort_ints/biorthog_mo_for_h.irp.f
@@ -0,0 +1,153 @@
+
+! ---
+
+double precision function bi_ortho_mo_coul_ints(l, k, j, i)
+
+  BEGIN_DOC
+  !
+  ! < mo^L_k mo^L_l | 1/r12 | mo^R_i mo^R_j >
+  !
+  END_DOC
+
+  implicit none
+  integer, intent(in) :: i, j, k, l
+  integer             :: m, n, p, q
+
+  bi_ortho_mo_coul_ints = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          !                                   p1h1p2h2   l1                  l2              r1               r2
+          bi_ortho_mo_coul_ints += ao_two_e_coul(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+end function bi_ortho_mo_coul_ints
+
+! ---
+
+! TODO :: transform into DEGEMM
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_coul_e_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_coul_e_chemist(k,i,l,j) = < k l | 1/r12 | i j > where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_coul(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_r_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_l_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_bi_ortho_coul_e_chemist = 0.d0 
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_bi_ortho_coul_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_coul_e, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_coul_e(k,l,i,j) = < k l | 1/r12 | i j > where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+           !    < k l | V12 | i j >                  (k i|l j)
+           mo_bi_ortho_coul_e(k,l,i,j) = mo_bi_ortho_coul_e_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_bi_ortho_one_e, (mo_num, mo_num)]
+
+  BEGIN_DOC 
+  !
+  ! mo_bi_ortho_one_e(k,i) = < MO^L_k | h_c | MO^R_i >
+  !
+  END_DOC
+
+  implicit none
+
+  call ao_to_mo_bi_ortho(ao_one_e_integrals, ao_num, mo_bi_ortho_one_e , mo_num)
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f
new file mode 100644
index 00000000..7f89899b
--- /dev/null
+++ b/src/bi_ort_ints/one_e_bi_ort.irp.f
@@ -0,0 +1,75 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)]
+
+  implicit none
+  integer :: i, j
+
+  ao_one_e_integrals_tc_tot = ao_one_e_integrals      
+
+  provide j1b_type
+
+  if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI  (j,i) &
+                                          + j1b_gauss_hermII (j,i) &
+                                          + j1b_gauss_nonherm(j,i) )
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_one_e(k,i) = <MO^L_k | h_c | MO^R_i>
+  !
+  END_DOC
+
+  implicit none
+ 
+  call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num)
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_x , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_y , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_z , (mo_num,mo_num)]
+ BEGIN_DOC
+ ! array of the integrals of Left MO_i * x Right MO_j
+ ! array of the integrals of Left MO_i * y Right MO_j
+ ! array of the integrals of Left MO_i * z Right MO_j
+ END_DOC
+ implicit none
+
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_x,                                                   &
+      size(ao_dipole_x,1),                                           &
+      mo_bi_orth_bipole_x,                                                   &
+      size(mo_bi_orth_bipole_x,1)                                            &
+      )
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_y,                                                   &
+      size(ao_dipole_y,1),                                           &
+      mo_bi_orth_bipole_y,                                                   &
+      size(mo_bi_orth_bipole_y,1)                                            &
+      )
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_z,                                                   &
+      size(ao_dipole_z,1),                                           &
+      mo_bi_orth_bipole_z,                                                   &
+      size(mo_bi_orth_bipole_z,1)                                            &
+      )
+
+END_PROVIDER
+
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
new file mode 100644
index 00000000..4694a998
--- /dev/null
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -0,0 +1,318 @@
+
+! ---
+
+! TODO :: optimization : transform into a DGEMM
+
+BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! mo_v_ki_bi_ortho_erf_rk_cst_mu(k,i,ip) = int dr chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1 )/(2|r - R_ip|) on the BI-ORTHO MO basis 
+  ! 
+  ! where phi_k(r) is a LEFT MOs and phi_i(r) is a RIGHT MO
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint
+ !$OMP PARALLEL         &
+ !$OMP DEFAULT (NONE)   &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ki_bi_ortho_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    call ao_to_mo_bi_ortho( v_ij_erf_rk_cst_mu            (1,1,ipoint), size(v_ij_erf_rk_cst_mu,             1) &
+                          , mo_v_ki_bi_ortho_erf_rk_cst_mu(1,1,ipoint), size(mo_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  mo_v_ki_bi_ortho_erf_rk_cst_mu = mo_v_ki_bi_ortho_erf_rk_cst_mu * 0.5d0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the BI-ORTHO MO basis
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint, i, j
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do ipoint = 1, n_points_final_grid
+        mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ki_bi_ortho_erf_rk_cst_mu(j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu
+
+END_PROVIDER 
+
+! ---
+
+! TODO :: optimization : transform into a DGEMM
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, 3, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! mo_x_v_ki_bi_ortho_erf_rk_cst_mu(k,i,m,ip) = int dr x(m) * chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1)/2|r - R_ip| on the BI-ORTHO MO basis 
+  !
+  ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => x(m) = x, m=2 => x(m) = y, m=3 => x(m) = z,
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint
+
+ !$OMP PARALLEL         &
+ !$OMP DEFAULT (NONE)   &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ki_bi_ortho_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,1,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,1,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,2,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,2,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,3,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,3,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  mo_x_v_ki_bi_ortho_erf_rk_cst_mu = 0.5d0 * mo_x_v_ki_bi_ortho_erf_rk_cst_mu
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, n_points_final_grid)]
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing int2_grad1_u12_ao_transp ...'
+  call wall_time(wall0)
+
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  endif
+  call wall_time(wall1)
+  print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
+
+  implicit none
+  integer :: ipoint
+  double precision :: wall0, wall1
+
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
+ implicit none
+ integer          :: i, j, ipoint
+ do ipoint = 1, n_points_final_grid
+   do i = 1, mo_num
+     do j = 1, mo_num
+      int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint)
+      int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint)
+      int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint)
+     enddo                                  
+   enddo
+ enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3)
+      enddo                                  
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, 3, mo_num, mo_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do ipoint = 1, n_points_final_grid
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,1,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,1,ipoint)
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,2,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,2,ipoint)
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,3,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,3,ipoint)
+      enddo
+    enddo
+  enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, 3, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! x_W_ki_bi_ortho_erf_rk(ip,m,k,i) = \int dr chi_k(r) \frac{(1 - erf(mu |r-R_ip|))}{2|r-R_ip|} (x(m)-R_ip(m)) phi_i(r) ON THE BI-ORTHO MO BASIS 
+  !
+  ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z,
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  END_DOC
+ 
+  implicit none
+  include 'constants.include.F'
+ 
+  integer          :: ipoint, m, i, k
+  double precision :: xyz
+  double precision :: wall0, wall1
+ 
+  print*, ' providing x_W_ki_bi_ortho_erf_rk ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (ipoint,m,i,k,xyz) & 
+ !$OMP SHARED (x_W_ki_bi_ortho_erf_rk,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          xyz = final_grid_points(m,ipoint)
+          x_W_ki_bi_ortho_erf_rk(ipoint,m,k,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,k,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp 
+ ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp
+
+  call wall_time(wall1)
+  print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_grid, 3, mo_num)]
+  BEGIN_DOC
+  ! x_W_ki_bi_ortho_erf_rk_diag(ip,m,i) = \int dr chi_i(r) (1 - erf(mu |r-R_ip|)) (x(m)-X(m)_ip) phi_i(r) ON THE BI-ORTHO MO BASIS 
+!
+! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z,
+!
+! R_ip = the "ip"-th point of the DFT Grid
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+ 
+  integer          :: ipoint, m, i
+  double precision :: xyz
+  double precision :: wall0, wall1
+ 
+  print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (ipoint,m,i,xyz) & 
+ !$OMP SHARED (x_W_ki_bi_ortho_erf_rk_diag,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do m = 1, 3
+      do ipoint = 1, n_points_final_grid
+        xyz = final_grid_points(m,ipoint)
+        x_W_ki_bi_ortho_erf_rk_diag(ipoint,m,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,i,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,i,i)
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f
new file mode 100644
index 00000000..4d21cb93
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijm.irp.f
@@ -0,0 +1,366 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the direct terms 
+  !
+  ! three_e_3_idx_direct_bi_ort(m,j,i) = <mji|-L|mji>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, j, i, integral)
+        three_e_3_idx_direct_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_direct_bi_ort(m,j,i) = three_e_3_idx_direct_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the first cyclic permutation 
+  !
+  ! three_e_3_idx_cycle_1_bi_ort(m,j,i) = <mji|-L|jim>
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, j, i, m, integral)
+        three_e_3_idx_cycle_1_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_cycle_1_bi_ort(m,j,i) = three_e_3_idx_cycle_1_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the second cyclic permutation 
+  !
+  ! three_e_3_idx_direct_bi_ort(m,j,i) = <mji|-L|imj>
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, i, m, j, integral)
+        three_e_3_idx_cycle_2_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_cycle_2_bi_ort(m,j,i) = three_e_3_idx_cycle_2_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 2 and 3
+  !
+  ! three_e_3_idx_exch23_bi_ort(m,j,i) = <mji|-L|jmi>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch23_bi_ort = 0.d0
+  print*,'Providing the three_e_3_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, j, m, i, integral)
+        three_e_3_idx_exch23_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch23_bi_ort(m,j,i) = three_e_3_idx_exch23_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 3
+  !
+  ! three_e_3_idx_exch13_bi_ort(m,j,i) = <mji|-L|ijm>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer :: i,j,m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, i, j, m,integral)
+        three_e_3_idx_exch13_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch13_bi_ort(m,j,i) = three_e_3_idx_exch13_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 2
+  !
+  ! three_e_3_idx_exch12_bi_ort(m,j,i) = <mji|-L|mij>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, i, j, integral)
+        three_e_3_idx_exch12_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 2
+  !
+  ! three_e_3_idx_exch12_bi_ort_new(m,j,i) = <mji|-L|mij>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch12_bi_ort_new = 0.d0
+  print *, ' Providing the three_e_3_idx_exch12_bi_ort_new ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch12_bi_ort_new)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, i, j, integral)
+        three_e_3_idx_exch12_bi_ort_new(m,j,i) = -1.d0 * integral 
+    enddo
+   enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch12_bi_ort_new(m,j,i) = three_e_3_idx_exch12_bi_ort_new(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f
new file mode 100644
index 00000000..853972f7
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijmk.irp.f
@@ -0,0 +1,284 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_direct_bi_ort(m,j,k,i) = <mjk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+ implicit none
+ integer          :: i, j, k, m
+ double precision :: integral, wall1, wall0
+
+  three_e_4_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral)
+          three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = <mjk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral)
+          three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! --
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = <mjk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral)
+          three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = <mjk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch23_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral)
+          three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = <mjk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral)
+          three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! 
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = <mjk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  ! 
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral)
+          three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f
new file mode 100644
index 00000000..bd5c4977
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijmkl.irp.f
@@ -0,0 +1,296 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral)
+            three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = <mlk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral)
+            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = <mlk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          do l = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral)
+            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch23_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral)
+            three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = <mlk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral)
+            three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = <mlk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral)
+            three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
new file mode 100644
index 00000000..48fa84f7
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -0,0 +1,207 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+ BEGIN_DOC
+! matrix element of the -L  three-body operator 
+!
+! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :)
+ END_DOC
+
+ implicit none
+ integer          :: i, j, k, l, m, n
+ double precision :: integral, wall1, wall0
+ character*(128)  :: name_file 
+
+  three_body_ints_bi_ort = 0.d0
+  print *, ' Providing the three_body_ints_bi_ort ...'
+  call wall_time(wall0)
+  name_file = 'six_index_tensor'
+
+! if(read_three_body_ints_bi_ort)then
+!  call read_fcidump_3_tc(three_body_ints_bi_ort)
+! else
+!  if(read_three_body_ints_bi_ort)then
+!   print*,'Reading three_body_ints_bi_ort from disk ...'
+!   call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file)
+!  else
+
+  !provide x_W_ki_bi_ortho_erf_rk 
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                       &
+ !$OMP DEFAULT (NONE)                 &
+ !$OMP PRIVATE (i,j,k,l,m,n,integral) & 
+ !$OMP SHARED (mo_num,three_body_ints_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, mo_num
+        do k = 1, mo_num
+          do l = 1, mo_num
+            do n = 1, mo_num
+              call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
+
+              three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral 
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+!  endif
+! endif
+
+  call wall_time(wall1)
+  print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0
+! if(write_three_body_ints_bi_ort)then
+!  print*,'Writing three_body_ints_bi_ort on disk ...'
+!  call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file)
+!  call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read")
+! endif
+
+END_PROVIDER 
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) &
+                + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m)        &
+              * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j)  )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
+
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint)        &
+!                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint)        &
+!                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint)        )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+!              * ( int2_grad1_u12_bimo(1,l,j,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
+
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
+              * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
+              * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m)        &
+              * ( int2_grad1_u12_bimo_transp(l,j,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(l,j,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(l,j,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_old
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
new file mode 100644
index 00000000..78047d1b
--- /dev/null
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -0,0 +1,271 @@
+
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_two_e_vartc_tot, (ao_num, ao_num, ao_num, ao_num) ]
+
+  integer :: i, j, k, l
+
+  provide j1b_type
+  provide mo_r_coef mo_l_coef
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          ao_two_e_vartc_tot(k,i,l,j) = ao_vartc_int_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+! ---
+
+BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! ao_two_e_tc_tot(k,i,l,j) = (ki|V^TC(r_12)|lj) = <lk| V^TC(r_12) |ji> where V^TC(r_12) is the total TC operator 
+  !
+  ! including both hermitian and non hermitian parts. THIS IS IN CHEMIST NOTATION. 
+  !
+  ! WARNING :: non hermitian ! acts on "the right functions" (i,j)
+  !
+  END_DOC
+
+  integer                    :: i, j, k, l
+  double precision           :: integral_sym, integral_nsym
+  double precision, external :: get_ao_tc_sym_two_e_pot
+
+  provide j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j)
+            !write(222,*) ao_two_e_tc_tot(k,i,l,j) 
+          enddo
+        enddo
+      enddo
+    enddo
+
+  else
+
+    PROVIDE ao_tc_sym_two_e_pot_in_map
+
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+
+            integral_sym  = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map)
+            ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis
+            integral_nsym = ao_non_hermit_term_chemist(k,i,l,j)
+
+            !print *, ' sym     integ = ', integral_sym
+            !print *, ' non-sym integ = ', integral_nsym
+
+            ao_two_e_tc_tot(k,i,l,j) = integral_sym + integral_nsym 
+            !write(111,*) ao_two_e_tc_tot(k,i,l,j) 
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+double precision function bi_ortho_mo_ints(l, k, j, i)
+
+  BEGIN_DOC
+  !
+  ! <mo^L_k mo^L_l | V^TC(r_12) | mo^R_i mo^R_j>
+  !
+  ! WARNING :: very naive, super slow, only used to DEBUG.
+  !
+  END_DOC
+
+  implicit none
+  integer, intent(in) :: i, j, k, l
+  integer             :: m, n, p, q
+
+  bi_ortho_mo_ints = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          !                                   p1h1p2h2   l1                  l2              r1               r2
+          bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+end function bi_ortho_mo_ints
+
+! ---
+
+! TODO :: transform into DEGEMM
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_two_e_chemist(k,i,l,j) = <k l|V(r_12)|i j> where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_tc_tot(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_r_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_l_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_bi_ortho_tc_two_e_chemist = 0.d0 
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_bi_ortho_tc_two_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_two_e(k,l,i,j) = <k l| V(r_12) |i j> where i,j are right MOs and k,l are left MOs
+  !
+  ! the potential V(r_12) contains ALL TWO-E CONTRIBUTION OF THE TC-HAMILTONIAN
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+           !              < k l | V12 | i j >                          (k i|l j)
+           mo_bi_ortho_tc_two_e(k,l,i,j) = mo_bi_ortho_tc_two_e_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+
+ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_exchange, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num) ]
+  implicit none
+  BEGIN_DOC
+  ! mo_bi_ortho_tc_two_e_jj(i,j) = J_ij = <ji|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_exchange(i,j) = K_ij = <ij|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_anti(i,j) = J_ij - K_ij
+  END_DOC
+
+  integer                        :: i,j
+  double precision               :: get_two_e_integral
+
+  mo_bi_ortho_tc_two_e_jj = 0.d0
+  mo_bi_ortho_tc_two_e_jj_exchange = 0.d0
+
+  do i=1,mo_num
+    do j=1,mo_num
+      mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i)
+      mo_bi_ortho_tc_two_e_jj_exchange(i,j) = mo_bi_ortho_tc_two_e(i,j,j,i)
+      mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j)
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)]
+&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! tc_2e_3idx_coulomb_integrals(j,k,i)  = <jk|ji> 
+ !
+ ! tc_2e_3idx_exchange_integrals(j,k,i) = <kj|ji> 
+ END_DOC
+ integer :: i,j,k,l
+ double precision :: get_two_e_integral
+ double precision :: integral
+
+ do i = 1, mo_num
+  do k = 1, mo_num
+   do j = 1, mo_num
+     tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) 
+     tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
diff --git a/src/bi_ortho_mos/EZFIO.cfg b/src/bi_ortho_mos/EZFIO.cfg
new file mode 100644
index 00000000..9b06a655
--- /dev/null
+++ b/src/bi_ortho_mos/EZFIO.cfg
@@ -0,0 +1,11 @@
+[mo_r_coef]
+type: double precision
+doc: right-coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (ao_basis.ao_num,mo_basis.mo_num)
+
+[mo_l_coef]
+type: double precision
+doc: right-coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (ao_basis.ao_num,mo_basis.mo_num)
diff --git a/src/bi_ortho_mos/NEED b/src/bi_ortho_mos/NEED
new file mode 100644
index 00000000..2a2196e5
--- /dev/null
+++ b/src/bi_ortho_mos/NEED
@@ -0,0 +1,3 @@
+mo_basis
+becke_numerical_grid
+dft_utils_in_r
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
new file mode 100644
index 00000000..2dad9485
--- /dev/null
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -0,0 +1,70 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1))
+
+  if(elec_alpha_num==elec_beta_num) then
+    TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha
+  else
+    ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
+    TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
new file mode 100644
index 00000000..42130575
--- /dev/null
+++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
@@ -0,0 +1,137 @@
+
+! TODO: left & right MO without duplicate AO calculation
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_r_in_r_array, (mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! mos_in_r_array(i,j) = value of the ith RIGHT mo on the jth grid point
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: mos_array(mo_num), r(3)
+
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i, j, r, mos_array) & 
+ !$OMP SHARED (mos_r_in_r_array, n_points_final_grid, mo_num, final_grid_points)
+  do i = 1, n_points_final_grid
+    r(1) = final_grid_points(1,i)
+    r(2) = final_grid_points(2,i)
+    r(3) = final_grid_points(3,i)
+    call give_all_mos_r_at_r(r, mos_array)
+    do j = 1, mo_num
+      mos_r_in_r_array(j,i) = mos_array(j)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+ 
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mo_num)]
+
+  BEGIN_DOC
+  ! mos_r_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
+  END_DOC
+
+  implicit none
+  integer :: i,j
+
+  do i = 1, n_points_final_grid
+    do j = 1, mo_num
+      mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) 
+    enddo
+  enddo
+ 
+END_PROVIDER
+
+! ---
+
+subroutine give_all_mos_r_at_r(r, mos_r_array)
+
+  BEGIN_DOC
+  ! mos_r_array(i) = ith RIGHT MO function evaluated at "r"
+  END_DOC
+
+  implicit none
+  double precision, intent(in)  :: r(3)
+  double precision, intent(out) :: mos_r_array(mo_num)
+  double precision              :: aos_array(ao_num)
+
+  call give_all_aos_at_r(r, aos_array)
+  call dgemv('N', mo_num, ao_num, 1.d0, mo_r_coef_transp, mo_num, aos_array, 1, 0.d0, mos_r_array, 1)
+
+end subroutine give_all_mos_r_at_r
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_l_in_r_array, (mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! mos_in_r_array(i,j) = value of the ith LEFT mo on the jth grid point
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: mos_array(mo_num), r(3)
+
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,mos_array,j) & 
+ !$OMP SHARED(mos_l_in_r_array,n_points_final_grid,mo_num,final_grid_points)
+  do i = 1, n_points_final_grid
+    r(1) = final_grid_points(1,i)
+    r(2) = final_grid_points(2,i)
+    r(3) = final_grid_points(3,i)
+    call give_all_mos_l_at_r(r, mos_array)
+    do j = 1, mo_num
+      mos_l_in_r_array(j,i) = mos_array(j)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+ 
+END_PROVIDER
+
+! ---
+
+subroutine give_all_mos_l_at_r(r, mos_l_array)
+
+  BEGIN_DOC
+  ! mos_l_array(i) = ith LEFT MO function evaluated at "r"
+  END_DOC
+
+  implicit none
+  double precision, intent(in)  :: r(3)
+  double precision, intent(out) :: mos_l_array(mo_num)
+  double precision              :: aos_array(ao_num)
+
+  call give_all_aos_at_r(r, aos_array)
+  call dgemv('N', mo_num, ao_num, 1.d0, mo_l_coef_transp, mo_num, aos_array, 1, 0.d0, mos_l_array, 1)
+
+end subroutine give_all_mos_l_at_r
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)]
+
+  BEGIN_DOC
+  ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  do i = 1, n_points_final_grid
+    do j = 1, mo_num
+      mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) 
+    enddo
+  enddo
+ 
+END_PROVIDER
+
+! ---
+
diff --git a/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f
new file mode 100644
index 00000000..5478fa5c
--- /dev/null
+++ b/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f
@@ -0,0 +1,100 @@
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array,(mo_num,n_points_final_grid,3)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array(i,j,k)          = value of the kth component of the gradient of ith RIGHT mo on the jth grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ mos_r_grad_in_r_array = 0.d0
+ do m=1,3
+  call dgemm('N','N',mo_num,n_points_final_grid,ao_num,1.d0,mo_r_coef_transp,mo_num,aos_grad_in_r_array(1,1,m),ao_num,0.d0,mos_r_grad_in_r_array(1,1,m),mo_num)
+ enddo
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array_transp,(3,mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array_transp(i,j,k)   = value of the kth component of the gradient of jth RIGHT mo on the ith grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_r_grad_in_r_array_transp = 0.d0
+ do i = 1, n_points_final_grid
+  do j = 1, mo_num
+   do m = 1, 3
+     mos_r_grad_in_r_array_transp(m,j,i) = mos_r_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array_transp_bis,(3,n_points_final_grid,mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array_transp(i,j,k)   = value of the ith component of the gradient on the jth grid point of jth RIGHT MO 
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_r_grad_in_r_array_transp_bis = 0.d0
+ do j = 1, mo_num
+  do i = 1, n_points_final_grid
+   do m = 1, 3
+     mos_r_grad_in_r_array_transp_bis(m,i,j) = mos_r_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array,(mo_num,n_points_final_grid,3)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array(i,j,k)          = value of the kth component of the gradient of ith RIGHT mo on the jth grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ mos_l_grad_in_r_array = 0.d0
+ do m=1,3
+  call dgemm('N','N',mo_num,n_points_final_grid,ao_num,1.d0,mo_r_coef_transp,mo_num,aos_grad_in_r_array(1,1,m),ao_num,0.d0,mos_l_grad_in_r_array(1,1,m),mo_num)
+ enddo
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array_transp,(3,mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array_transp(i,j,k)   = value of the kth component of the gradient of jth RIGHT mo on the ith grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_l_grad_in_r_array_transp = 0.d0
+ do i = 1, n_points_final_grid
+  do j = 1, mo_num
+   do m = 1, 3
+     mos_l_grad_in_r_array_transp(m,j,i) = mos_l_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array_transp_bis,(3,n_points_final_grid,mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array_transp(i,j,k)   = value of the ith component of the gradient on the jth grid point of jth RIGHT MO 
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_l_grad_in_r_array_transp_bis = 0.d0
+ do j = 1, mo_num
+  do i = 1, n_points_final_grid
+   do m = 1, 3
+     mos_l_grad_in_r_array_transp_bis(m,i,j) = mos_l_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
new file mode 100644
index 00000000..d51999fc
--- /dev/null
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -0,0 +1,224 @@
+
+! ---
+
+subroutine ao_to_mo_bi_ortho(A_ao, LDA_ao, A_mo, LDA_mo)
+
+  BEGIN_DOC
+  !
+  ! Transform A from the |AO| basis to the BI ORTHONORMAL MOS 
+  !
+  ! $C_L^\dagger.A_{ao}.C_R$ where C_L and C_R are the LEFT and RIGHT MO coefs
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_ao(LDA_ao,ao_num)
+  double precision, intent(out) :: A_mo(LDA_mo,mo_num)
+  double precision, allocatable :: T(:,:)
+
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  ! T = A_ao x mo_r_coef
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0      &
+            , A_ao, LDA_ao, mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, T, size(T, 1) )
+
+  ! A_mo = mo_l_coef.T x T
+  call dgemm( 'T', 'N', mo_num, mo_num, ao_num, 1.d0       &
+            , mo_l_coef, size(mo_l_coef, 1), T, size(T, 1) &
+            , 0.d0, A_mo, LDA_mo )
+
+!  call restore_symmetry(mo_num,mo_num,A_mo,size(A_mo,1),1.d-12)
+  deallocate(T)
+
+end subroutine ao_to_mo_bi_ortho
+
+! ---
+
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_2(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
+
+  BEGIN_DOC
+  !
+  ! Molecular right-orbital coefficients on |AO| basis set
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_bi_ortho_mos_mo_r_coef(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_r_coef with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      write(*,*) 'Read mo_r_coef'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST(mo_r_coef, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_r_coef with MPI'
+      endif
+    IRP_ENDIF
+  else
+
+    print*, 'mo_r_coef are mo_coef'
+    do i = 1, mo_num
+      do j = 1, ao_num
+        mo_r_coef(j,i) = mo_coef(j,i)
+      enddo
+    enddo
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_l_coef, (ao_num, mo_num) ]
+
+  BEGIN_DOC
+  !
+  ! Molecular left-orbital coefficients on |AO| basis set
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_bi_ortho_mos_mo_l_coef(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_l_coef with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      write(*,*) 'Read mo_l_coef'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST(mo_l_coef, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_l_coef with MPI'
+      endif
+    IRP_ENDIF
+  else
+
+    print*, 'mo_l_coef are mo_coef'
+    do i = 1, mo_num
+      do j = 1, ao_num
+        mo_l_coef(j,i) = mo_coef(j,i)
+      enddo
+    enddo
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_r_coef_transp, (mo_num, ao_num)]
+
+  implicit none
+  integer :: j, m
+  do j = 1, mo_num
+    do m = 1, ao_num
+      mo_r_coef_transp(j,m) = mo_r_coef(m,j)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_l_coef_transp, (mo_num, ao_num)]
+
+  implicit none
+  integer :: j, m
+  do j = 1, mo_num
+    do m = 1, ao_num
+      mo_l_coef_transp(j,m) = mo_l_coef(m,j)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f
new file mode 100644
index 00000000..d7f45c94
--- /dev/null
+++ b/src/bi_ortho_mos/overlap.irp.f
@@ -0,0 +1,160 @@
+
+
+ BEGIN_PROVIDER [ double precision, overlap_bi_ortho, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_diag_bi_ortho, (mo_num)]
+
+  BEGIN_DOC
+  ! Overlap matrix between the RIGHT and LEFT MOs. Should be the identity matrix 
+  END_DOC
+
+  implicit none
+  integer                       :: i, k, m, n
+  double precision              :: accu_d, accu_nd 
+  double precision, allocatable :: tmp(:,:)
+  
+ ! TODO : re do the DEGEMM
+
+  overlap_bi_ortho = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do m = 1, ao_num
+        do n = 1, ao_num
+          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+!  allocate( tmp(mo_num,ao_num) )
+!
+!  ! tmp <-- L.T x S_ao
+!  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                         & 
+!            , mo_l_coef, size(mo_l_coef, 1), ao_overlap, size(ao_overlap, 1) &
+!            , 0.d0, tmp, size(tmp, 1) )
+!
+!  ! S <-- tmp x R
+!  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0           & 
+!            , tmp, size(tmp, 1), mo_r_coef, size(mo_r_coef, 1) &
+!            , 0.d0, overlap_bi_ortho, size(overlap_bi_ortho, 1) )
+!
+!  deallocate( tmp )
+
+  do i = 1, mo_num
+    overlap_diag_bi_ortho(i) = overlap_bi_ortho(i,i)
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      if(i==k) then
+        accu_d += dabs(overlap_bi_ortho(k,i))
+      else
+        accu_nd += dabs(overlap_bi_ortho(k,i))
+      endif
+    enddo 
+  enddo
+  accu_d = accu_d/dble(mo_num)
+  accu_nd = accu_nd/dble(mo_num**2-mo_num)
+  if(dabs(accu_d-1.d0).gt.1.d-10.or.dabs(accu_nd).gt.1.d-10)then
+    print*,'Warning !!!'
+    print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0)
+    print*,'And bi orthogonality is off by an average of ',accu_nd
+    print*,'****************'
+    print*,'Overlap matrix betwee mo_l_coef and mo_r_coef  '
+    do i = 1, mo_num
+      write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:)
+    enddo
+  endif
+  print*,'Average trace of overlap_bi_ortho (should be 1.)'
+  print*,'accu_d  = ',accu_d
+  print*,'Sum of off diagonal terms of overlap_bi_ortho (should be zero)'
+  print*,'accu_nd = ',accu_nd
+  print*,'****************'
+ 
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_mo_r, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_mo_l, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_i|MO_R_j>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r = 0.d0
+  overlap_mo_l = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
+          overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_mo_r_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_mo_l_mo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_j|MO_R_i>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r_mo = 0.d0
+  overlap_mo_l_mo = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p)
+          overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, angle_left_right, (mo_num)]
+&BEGIN_PROVIDER [ double precision, max_angle_left_right]
+
+  BEGIN_DOC
+  ! angle_left_right(i) = angle between the left-eigenvector chi_i and the right-eigenvector phi_i
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right, arg
+  double precision :: angle(mo_num)
+
+  do i = 1, mo_num
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+    arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0)
+    arg = max(arg, -1.d0)
+    angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0)
+  enddo
+
+  angle(1:mo_num) = dabs(angle_left_right(1:mo_num))
+  max_angle_left_right = maxval(angle)
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/ccsd/80.ccsd_spin.bats b/src/ccsd/80.ccsd_spin.bats
new file mode 100644
index 00000000..0b616871
--- /dev/null
+++ b/src/ccsd/80.ccsd_spin.bats
@@ -0,0 +1,225 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run() {
+  thresh1=1e-6
+  thresh2=1e-6
+  test_exe scf || skip
+  qp set_file $1
+  qp edit --check
+  #qp run scf
+  qp set_frozen_core
+  qp set utils_cc cc_par_t true
+  qp set utils_cc cc_thresh_conv 1e-12
+  file="$(echo $1 | sed 's/.ezfio//g')"
+  qp run ccsd_spin_orb | tee $file.ccsd.out
+  energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
+  energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
+  #rm $file.ccsd.out
+  eq $energy1 $2 $thresh1
+  eq $energy2 $3 $thresh2
+}
+
+@test "b2_stretched" {
+run  b2_stretched.ezfio -49.136487344382 -0.003497589175
+}
+
+@test "be" {
+run  be.ezfio -14.623559003577 -0.000230982022
+}
+
+@test "c2h2" {
+run  c2h2.ezfio -12.394008897618 -0.010790491561
+}
+
+@test "ch4" {
+run  ch4.ezfio -40.390721785799 -0.004476100282
+}
+
+@test "clf" {
+run  clf.ezfio -559.186562904081 -0.006577143392
+}
+
+@test "clo" {
+run  clo.ezfio -534.564874409332 -0.007584571424
+}
+
+@test "co2" {
+run  co2.ezfio -188.129602527766 -0.018040668885
+}
+
+@test "dhno" {
+run  dhno.ezfio -130.816650109473 -0.012197331453
+}
+
+@test "f2" {
+run  f2.ezfio -199.287826338097 -0.017592872692
+}
+
+@test "f" {
+run  f.ezfio -99.616644511121 -0.003624525307
+}
+
+@test "h2o2" {
+run  h2o2.ezfio -151.182552729963 -0.009511682086
+}
+
+@test "h2o" {
+run  h2o.ezfio -76.237710276526 -0.003001800577
+}
+
+@test "h2s" {
+run  h2s.ezfio -398.861214015390 -0.003300559757
+}
+
+@test "h3coh" {
+run  h3coh.ezfio -115.221296424969 -0.003566171432
+}
+
+@test "hbo" {
+run  hbo.ezfio -100.213539770415 -0.006851489212
+}
+
+@test "hcn" {
+run  hcn.ezfio -93.190247992657 -0.013418135043
+}
+
+@test "hco" {
+run  hco.ezfio -113.405413962350 -0.007973455337
+}
+
+@test "lif" {
+run  lif.ezfio -107.270402903250 -0.007742969005
+}
+
+@test "n2" {
+run  n2.ezfio -109.355358930472 -0.018477744342
+}
+
+@test "n2h4" {
+run  n2h4.ezfio -111.556885923139 -0.009048077008
+}
+
+@test "nh3" {
+run  nh3.ezfio -56.465503060954 -0.007638273755
+}
+
+@test "oh" {
+run  oh.ezfio -75.614606132774 -0.004126661739
+}
+
+@test "sih2_3b1" {
+run  sih2_3b1.ezfio -290.016780973072 -0.000497825874
+}
+
+@test "sih3" {
+run  sih3.ezfio -5.575343504534 -0.002094123268
+}
+
+@test "so" {
+run  so.ezfio -26.035945178665 -0.010594351274
+}
+
+#@test "b2_stretched" {
+#run  b2_stretched.ezfio -49.136487344382 -49.139984933557
+#}
+#
+#@test "be" {
+#run  be.ezfio -14.623559003577 -14.623789985599
+#}
+#
+#@test "c2h2" {
+#run  c2h2.ezfio -12.394008897618 -12.404799389179
+#}
+#
+#@test "ch4" {
+#run  ch4.ezfio -40.390721784961 -40.395197884406
+#}
+#
+#@test "clf" {
+#run  clf.ezfio -559.186562906072 -559.193140046904
+#}
+#
+#@test "clo" {
+#run  clo.ezfio -534.564874409333 -534.572458980757
+#}
+#
+#@test "co2" {
+#run  co2.ezfio -188.129602511724 -188.147643198675
+#}
+#
+#@test "dhno" {
+#run  dhno.ezfio -130.816650109473 -130.828847440925
+#}
+#
+#@test "f2" {
+#run  f2.ezfio -199.287826338097 -199.305419210789
+#}
+#
+#@test "f" {
+#run  f.ezfio -99.616644511120 -99.620269036428
+#}
+#
+#@test "h2o2" {
+#run  h2o2.ezfio -151.182552729963 -151.192064412049
+#}
+#
+#@test "h2o" {
+#run  h2o.ezfio -76.237710276526 -76.240712077103
+#}
+#
+#@test "h2s" {
+#run  h2s.ezfio -398.861214015416 -398.864514575146
+#}
+#
+#@test "h3coh" {
+#run  h3coh.ezfio -115.221296424969 -115.224862596401
+#}
+#
+#@test "hbo" {
+#run  hbo.ezfio -100.213539770415 -100.220391259627
+#}
+#
+#@test "hcn" {
+#run  hcn.ezfio -93.190247983000 -93.203666131216
+#}
+#
+#@test "hco" {
+#run  hco.ezfio -113.405413962350 -113.413387417687
+#}
+#
+#@test "lif" {
+#run  lif.ezfio -107.270402903211 -107.278145872216
+#}
+#
+#@test "n2" {
+#run  n2.ezfio -109.355358930472 -109.373836674814
+#}
+#
+#@test "n2h4" {
+#run  n2h4.ezfio -111.556885922642 -111.565934000556
+#}
+#
+#@test "nh3" {
+#run  nh3.ezfio -56.465503060954 -56.473141334709
+#}
+#
+#@test "oh" {
+#run  oh.ezfio -75.614606131897 -75.618732794235
+#}
+#
+#@test "sih2_3b1" {
+#run  sih2_3b1.ezfio -290.016780973071 -290.017278798946
+#}
+#
+#@test "sih3" {
+#run  sih3.ezfio -5.575343504534 -5.577437627802
+#}
+#
+#@test "so" {
+#run  so.ezfio -26.035945181998 -26.046539528491
+#}
+
diff --git a/src/ccsd/81.ccsd_space.bats b/src/ccsd/81.ccsd_space.bats
new file mode 100644
index 00000000..02e8e987
--- /dev/null
+++ b/src/ccsd/81.ccsd_space.bats
@@ -0,0 +1,225 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run() {
+  thresh1=1e-6
+  thresh2=1e-6
+  test_exe scf || skip
+  qp set_file $1
+  qp edit --check
+  #qp run scf
+  qp set_frozen_core
+  qp set utils_cc cc_par_t true
+  qp set utils_cc cc_thresh_conv 1e-12
+  file="$(echo $1 | sed 's/.ezfio//g')"
+  qp run ccsd_space_orb | tee $file.ccsd.out
+  energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
+  energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')"
+  #rm $file.ccsd.out
+  eq $energy1 $2 $thresh1
+  eq $energy2 $3 $thresh2
+}
+
+@test "b2_stretched" {
+run  b2_stretched.ezfio -49.136487344382 -0.003497589175
+}
+
+@test "be" {
+run  be.ezfio -14.623559003577 -0.000230982022
+}
+
+@test "c2h2" {
+run  c2h2.ezfio -12.394008897618 -0.010790491561
+}
+
+@test "ch4" {
+run  ch4.ezfio -40.390721785799 -0.004476100282
+}
+
+@test "clf" {
+run  clf.ezfio -559.186562904081 -0.006577143392
+}
+
+#@test "clo" {
+#run  clo.ezfio -534.564874409332 -0.007584571424
+#}
+
+@test "co2" {
+run  co2.ezfio -188.129602527766 -0.018040668885
+}
+
+#@test "dhno" {
+#run  dhno.ezfio -130.816650109473 -0.012197331453
+#}
+
+@test "f2" {
+run  f2.ezfio -199.287826338097 -0.017592872692
+}
+
+#@test "f" {
+#run  f.ezfio -99.616644511121 -0.003624525307
+#}
+
+@test "h2o2" {
+run  h2o2.ezfio -151.182552729963 -0.009511682086
+}
+
+@test "h2o" {
+run  h2o.ezfio -76.237710276526 -0.003001800577
+}
+
+@test "h2s" {
+run  h2s.ezfio -398.861214015390 -0.003300559757
+}
+
+@test "h3coh" {
+run  h3coh.ezfio -115.221296424969 -0.003566171432
+}
+
+@test "hbo" {
+run  hbo.ezfio -100.213539770415 -0.006851489212
+}
+
+@test "hcn" {
+run  hcn.ezfio -93.190247992657 -0.013418135043
+}
+
+#@test "hco" {
+#run  hco.ezfio -113.405413962350 -0.007973455337
+#}
+
+@test "lif" {
+run  lif.ezfio -107.270402903250 -0.007742969005
+}
+
+@test "n2" {
+run  n2.ezfio -109.355358930472 -0.018477744342
+}
+
+@test "n2h4" {
+run  n2h4.ezfio -111.556885923139 -0.009048077008
+}
+
+@test "nh3" {
+run  nh3.ezfio -56.465503060954 -0.007638273755
+}
+
+#@test "oh" {
+#run  oh.ezfio -75.614606132774 -0.004126661739
+#}
+
+#@test "sih2_3b1" {
+#run  sih2_3b1.ezfio -290.016780973072 -0.000497825874
+#}
+
+#@test "sih3" {
+#run  sih3.ezfio -5.575343504534 -0.002094123268
+#}
+
+#@test "so" {
+#run  so.ezfio -26.035945178665 -0.010594351274
+#}
+
+#@test "b2_stretched" {
+#run  b2_stretched.ezfio -49.136487344382 -49.139984933557
+#}
+#
+#@test "be" {
+#run  be.ezfio -14.623559003577 -14.623789985599
+#}
+#
+#@test "c2h2" {
+#run  c2h2.ezfio -12.394008897618 -12.404799389179
+#}
+#
+#@test "ch4" {
+#run  ch4.ezfio -40.390721784961 -40.395197884406
+#}
+#
+#@test "clf" {
+#run  clf.ezfio -559.186562906072 -559.193140046904
+#}
+#
+##@test "clo" {
+##run  clo.ezfio -534.564874409333 -534.572458980757
+##}
+#
+#@test "co2" {
+#run  co2.ezfio -188.129602511724 -188.147643198675
+#}
+#
+##@test "dhno" {
+##run  dhno.ezfio -130.816650109473 -130.828847440925
+##}
+#
+#@test "f2" {
+#run  f2.ezfio -199.287826338097 -199.305419210789
+#}
+#
+##@test "f" {
+##run  f.ezfio -99.616644511120 -99.620269036428
+##}
+#
+#@test "h2o2" {
+#run  h2o2.ezfio -151.182552729963 -151.192064412049
+#}
+#
+#@test "h2o" {
+#run  h2o.ezfio -76.237710276526 -76.240712077103
+#}
+#
+#@test "h2s" {
+#run  h2s.ezfio -398.861214015416 -398.864514575146
+#}
+#
+#@test "h3coh" {
+#run  h3coh.ezfio -115.221296424969 -115.224862596401
+#}
+#
+#@test "hbo" {
+#run  hbo.ezfio -100.213539770415 -100.220391259627
+#}
+#
+#@test "hcn" {
+#run  hcn.ezfio -93.190247983000 -93.203666131216
+#}
+#
+##@test "hco" {
+##run  hco.ezfio -113.405413962350 -113.413387417687
+##}
+#
+#@test "lif" {
+#run  lif.ezfio -107.270402903211 -107.278145872216
+#}
+#
+#@test "n2" {
+#run  n2.ezfio -109.355358930472 -109.373836674814
+#}
+#
+#@test "n2h4" {
+#run  n2h4.ezfio -111.556885922642 -111.565934000556
+#}
+#
+#@test "nh3" {
+#run  nh3.ezfio -56.465503060954 -56.473141334709
+#}
+#
+##@test "oh" {
+##run  oh.ezfio -75.614606131897 -75.618732794235
+##}
+#
+##@test "sih2_3b1" {
+##run  sih2_3b1.ezfio -290.016780973071 -290.017278798946
+##}
+#
+##@test "sih3" {
+##run  sih3.ezfio -5.575343504534 -5.577437627802
+##}
+#
+##@test "so" {
+##run  so.ezfio -26.035945181998 -26.046539528491
+##}
+
diff --git a/src/ccsd/NEED b/src/ccsd/NEED
new file mode 100644
index 00000000..e6e6bc59
--- /dev/null
+++ b/src/ccsd/NEED
@@ -0,0 +1,2 @@
+hartree_fock
+utils_cc
diff --git a/src/ccsd/README.md b/src/ccsd/README.md
new file mode 100644
index 00000000..fa59e8a6
--- /dev/null
+++ b/src/ccsd/README.md
@@ -0,0 +1,31 @@
+# CCSD in spin orbitals and spatial orbitals
+
+CCSD and CCSD(T) in spin orbitals for open and closed shell systems.
+CCSD and CCSD(T) in spatial orbitals for closed shell systems.
+
+## Calculations
+The program will automatically choose the version in spin or spatial orbitals
+To run the general program:  
+```
+qp run ccsd
+```
+Nevertheless, you can enforce the run in spin orbitals with
+```
+qp run ccsd_spin_orb
+```
+
+## Settings
+The settings can be changed with:
+```
+qp set utils_cc cc_#param #val
+```
+For more informations on the settings, look at the module utils_cc and its documentation. 
+
+## Org files
+The org files are stored in the directory org in order to avoid overwriting on user changes.
+The org files can be modified, to export the change to the source code, run
+```
+./TANGLE_org_mode.sh and
+mv *.irp.f ../.
+```
+
diff --git a/src/ccsd/ccsd.irp.f b/src/ccsd/ccsd.irp.f
new file mode 100644
index 00000000..035f50b8
--- /dev/null
+++ b/src/ccsd/ccsd.irp.f
@@ -0,0 +1,18 @@
+program ccsd
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD program
+  END_DOC
+
+  read_wf = .True.
+  touch read_wf
+
+  if (.not. cc_ref_is_open_shell) then
+    call run_ccsd_space_orb
+  else 
+    call run_ccsd_spin_orb
+  endif
+
+end
diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f
new file mode 100644
index 00000000..53028ec0
--- /dev/null
+++ b/src/ccsd/ccsd_space_orb.irp.f
@@ -0,0 +1,12 @@
+! Code
+
+program ccsd
+
+  implicit none
+
+  read_wf = .True.
+  touch read_wf
+
+  call run_ccsd_space_orb
+  
+end
diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f
new file mode 100644
index 00000000..b63375cf
--- /dev/null
+++ b/src/ccsd/ccsd_space_orb_sub.irp.f
@@ -0,0 +1,2078 @@
+subroutine run_ccsd_space_orb
+  
+  implicit none
+
+  integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d
+  integer :: u,v,gam,beta,tmp_gam,tmp_beta
+  integer :: nb_iter
+  double precision :: get_two_e_integral
+  double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
+  logical :: not_converged
+
+  double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:)
+  double precision, allocatable :: t1(:,:), r1(:,:)
+  double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
+  
+  double precision, allocatable :: all_err(:,:), all_t(:,:)
+  integer, allocatable          :: list_occ(:), list_vir(:)
+  integer(bit_kind)             :: det(N_int,2)
+  integer                       :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4)
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  det = psi_det(:,:,cc_ref)
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  !call extract_n_spin(det,n_spin)
+  nOa = cc_nOa !n_spin(1)
+  nOb = cc_nOb !n_spin(2)
+  nVa = cc_nVa !n_spin(3)
+  nVb = cc_nVb !n_spin(4)
+
+  ! Check that the reference is a closed shell determinant
+  if (cc_ref_is_open_shell) then
+    call abort
+  endif
+  
+  ! Number of occ/vir spatial orb
+  nO = nOa
+  nV = nVa
+  
+  allocate(list_occ(nO),list_vir(nV))
+  list_occ = cc_list_occ
+  list_vir = cc_list_vir
+  ! Debug
+  !call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
+  !print*,'occ',list_occ
+  !print*,'vir',list_vir
+  
+  allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
+  allocate(tau(nO,nO,nV,nV))
+  allocate(t1(nO,nV), r1(nO,nV))
+  allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
+
+  if (cc_update_method == 'diis') then
+    allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+    all_err = 0d0
+    all_t   = 0d0
+  endif
+
+  if (elec_alpha_num /= elec_beta_num) then
+    print*, 'Only for closed shell systems'
+    print*, 'elec_alpha_num=',elec_alpha_num
+    print*, 'elec_beta_num =',elec_beta_num
+    print*, 'abort'
+    call abort
+  endif
+
+  ! Init
+  call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
+  call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
+  call update_tau_space(nO,nV,t1,t2,tau)
+  !print*,'hf_energy', hf_energy
+  call det_energy(det,uncorr_energy)
+  print*,'Det energy', uncorr_energy
+  call ccsd_energy_space(nO,nV,tau,t1,energy)
+  print*,'Guess energy', uncorr_energy+energy, energy
+  
+  nb_iter = 0
+  not_converged = .True.
+  max_r1 = 0d0
+  max_r2 = 0d0
+
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(ta)
+  
+  do while (not_converged)
+    
+    call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
+    call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
+    call compute_H_vo(nO,nV,t1,t2,H_vo)
+
+    ! Residue
+    call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
+    call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
+    max_r = max(max_r1,max_r2)
+    
+    ! Update
+    if (cc_update_method == 'diis') then
+      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)
+
+    ! Standard update as T = T - Delta
+    elseif (cc_update_method == 'none') then
+      call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
+      call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
+    else
+      print*,'Unkonw cc_method_method: '//cc_update_method
+    endif
+
+    call update_tau_space(nO,nV,t1,t2,tau)
+
+    ! Energy
+    call ccsd_energy_space(nO,nV,tau,t1,energy)
+    write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
+
+    nb_iter = nb_iter + 1
+    if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
+      not_converged = .False.
+    endif
+    
+  enddo
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(tb)
+  print*,'Time: ',tb-ta, ' s'
+  print*,''
+  if (max_r < cc_thresh_conv) then
+    write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
+  else
+    write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
+  endif
+  print*,''
+  write(*,'(A15,F18.12,A3)') ' E(CCSD)     = ', uncorr_energy+energy, ' Ha'
+  write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
+  write(*,'(A15,1pE10.2,A3)')' Conv        = ', max_r
+  print*,''
+
+  call write_t1(nO,nV,t1)
+  call write_t2(nO,nV,t2)
+  
+  ! Deallocation
+  if (cc_update_method == 'diis') then
+    deallocate(all_err,all_t)
+  endif
+
+  deallocate(H_vv,H_oo,H_vo,r1,r2,tau)
+  
+  ! CCSD(T)
+  double precision :: e_t
+
+  if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then 
+
+    ! Dumb way
+    !call wall_time(ta)
+    !call ccsd_par_t_space(nO,nV,t1,t2,e_t)
+    !call wall_time(tb)
+    !print*,'Time: ',tb-ta, ' s'
+
+    !print*,''
+    !write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
+    !write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
+    !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
+    !print*,''
+
+    ! New
+    print*,'Computing (T) correction...'
+    call wall_time(ta)
+    call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
+         ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
+    call wall_time(tb)
+    print*,'Time: ',tb-ta, ' s'
+
+    print*,''
+    write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
+    print*,''
+  endif
+  
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+
+  deallocate(t1,t2)
+
+end
+
+! Energy
+
+subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: t1(nO,nV)
+  double precision, intent(out) :: energy
+
+  ! internal
+  integer :: i,j,a,b
+  double precision :: e
+
+  energy = 0d0
+  !$omp parallel &
+  !$omp shared(nO,nV,energy,tau,t1,&
+  !$omp cc_space_f_vo,cc_space_w_oovv) &
+  !$omp private(i,j,a,b,e) &
+  !$omp default(none)
+  e = 0d0
+  !$omp do
+  do i = 1, nO
+    do a = 1, nV
+      e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp do
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
+       enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp critical
+  energy = energy + e
+  !$omp end critical
+  !$omp end parallel
+  
+end
+
+! Tau
+
+subroutine update_tau_space(nO,nV,t1,t2,tau)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+
+  ! out
+  double precision, intent(out) :: tau(nO,nO,nV,nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tau,t2,t1) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+    
+end
+
+! R1
+
+subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)
+
+  ! out
+  double precision, intent(out) :: r1(nO,nV), max_r1
+  
+  ! internal
+  integer                       :: u,i,j,beta,a,b
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r1,cc_space_f_ov) &
+  !$omp private(u,beta) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do u = 1, nO
+      r1(u,beta) = cc_space_f_ov(u,beta)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a)
+  ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO)
+  ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV)
+  ! cc_space_f_vo(a,i) * t1(u,a)    -> X1(nO,nO), O(nO*nO*nV)
+  ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV)  
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_oo(:,:)
+  allocate(X_oo(nO,nO))
+  call dgemm('N','N', nO, nO, nV, &
+             -2d0, t1    , size(t1,1), &
+                   cc_space_f_vo, size(cc_space_f_vo,1), &
+              0d0, X_oo  , size(X_oo,1))
+
+  call dgemm('T','N', nO, nV, nO, &
+             1d0, X_oo, size(X_oo,2), &
+                  t1  , size(t1,1), &
+             1d0, r1  , size(r1,1)) 
+  deallocate(X_oo)
+
+  ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do a = 1, nV
+  !      r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a)
+  !    enddo
+  !  enddo
+  !enddo
+  call dgemm('N','N', nO, nV, nV, &
+             1d0, t1  , size(t1,1), &
+                  H_vv, size(H_vv,1), &
+             1d0, r1  , size(r1,1))
+
+  ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta)
+  !    enddo
+  !  enddo
+  !enddo
+  call dgemm('N','N', nO, nV, nO, &
+             -1d0, H_oo, size(H_oo,1), &
+                   t1  , size(t1,1), &
+              1d0, r1, size(r1,1))
+
+  !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta))
+  ! <=>
+  ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) + H_vo(a,i) * &
+  !        (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta))
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_voov(:,:,:,:)
+  allocate(X_voov(nV, nO, nO, nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,X_voov,t2,t1) &
+  !$omp private(u,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemv('T', nV*nO, nO*nV, &
+             1d0, X_voov, size(X_voov,1) * size(X_voov,2), &
+                  H_vo  , 1, &
+             1d0, r1    , 1)
+  
+  deallocate(X_voov)
+
+  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a)
+  ! <=> 
+  ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_ovov(:,:,:,:)
+  allocate(X_ovov(nO, nV, nO, nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) &
+  !$omp private(u,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do a = 1, nv
+        do i = 1, nO
+          X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemv('T', nO*nV, nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  t1     , 1, &
+             1d0, r1     , 1)
+  
+  deallocate(X_ovov)
+
+  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
+  ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) 
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        do b = 1, nV
+  !          r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:)
+  allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) &
+  !$omp private(b,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do i = 1, nO
+      do b = 1, nV
+        do a = 1, nV
+          W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)  
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do i = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          T_vvoo(a,b,i,u) = tau(i,u,a,b)  
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp end parallel
+
+  call dgemm('T','N',nO,nV,nO*nV*nV, &
+             1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), &
+                  W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), &
+             1d0, r1    , size(r1,1))
+  
+  deallocate(W_vvov,T_vvoo)
+
+  ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
+  ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) 
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do j = 1, nO
+  !        do a = 1, nV
+  !          r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: W_oovo(:,:,:,:)
+  allocate(W_oovo(nO,nO,nV,nO))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) &
+  !$omp private(u,a,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do u = 1, nO
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('T','N', nO, nV, nO*nO*nV, &
+             -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), &
+                   tau   , size(tau,1) * size(tau,2) * size(tau,3), &
+              1d0, r1    , size(r1,1))
+  
+  deallocate(W_oovo)
+
+  max_r1 = 0d0
+  do a = 1, nV
+    do i = 1, nO
+      if (dabs(r1(i,a)) > max_r1) then
+        max_r1 = dabs(r1(i,a))
+      endif
+    enddo
+  enddo
+
+  ! Change the sign for consistency with the code in spin orbitals
+  !$omp parallel &
+  !$omp shared(nO,nV,r1) &
+  !$omp private(a,i) &
+  !$omp default(none)
+  !$omp do 
+  do a = 1, nV
+    do i = 1, nO
+      r1(i,a) = -r1(i,a)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+
+! H_oo
+
+subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: H_oo(nO, nO)
+
+  integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u
+
+  !H_oo = 0d0
+
+  !do i = 1, nO
+  !  do u = 1, nO
+  !    H_oo(u,i) = cc_space_f_oo(u,i)
+
+  !    do j = 1, nO
+  !      do a = 1, nV
+  !        do b = 1, nV
+  !          !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b)
+  !          !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b)
+  !          H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b)
+  !        enddo
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  ! H_oo(u,i) = cc_space_f_oo(u,i)
+  !$omp parallel &
+  !$omp shared(nO,H_oo,cc_space_f_oo) &
+  !$omp private(i,u) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do u = 1, nO
+      H_oo(u,i) = cc_space_f_oo(u,i)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b)
+  ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b)
+  call dgemm('N','T', nO, nO, nO*nV*nV,       &
+             1d0, tau     , size(tau,1),      &
+                  cc_space_w_oovv, size(cc_space_w_oovv,1), &
+             1d0, H_oo    , size(H_oo,1))     
+  
+end
+
+! H_vv
+
+subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: H_vv(nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta
+
+  !H_vv = 0d0
+
+  !do beta = 1, nV
+  !  do a = 1, nV
+  !    H_vv(a,beta) = cc_space_f_vv(a,beta)
+
+  !    do j = 1, nO
+  !      do i = 1, nO
+  !        do b = 1, nV
+  !          !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b)
+  !          H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b)
+  !        enddo
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: tmp_tau(:,:,:,:)
+
+  allocate(tmp_tau(nV,nO,nO,nV))
+
+  ! H_vv(a,beta) = cc_space_f_vv(a,beta)
+  !$omp parallel &
+  !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) &
+  !$omp private(a,beta,i,j,b) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do a = 1, nV
+      H_vv(a,beta) = cc_space_f_vv(a,beta)
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b)
+  ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta)
+  
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do b = 1, nV
+          tmp_tau(b,i,j,beta) = tau(i,j,beta,b) 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nV,nV,nO*nO*nV,           &
+             -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), &
+                   tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), &
+              1d0, H_vv    , size(H_vv,1))
+
+  deallocate(tmp_tau)
+  
+end
+
+! H_vo
+
+subroutine compute_H_vo(nO,nV,t1,t2,H_vo)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: H_vo(nV, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta
+
+  !H_vo = 0d0
+
+  !do i = 1, nO
+  !  do a = 1, nV
+  !    H_vo(a,i) = cc_space_f_vo(a,i)
+
+  !    do j = 1, nO
+  !      do b = 1, nV
+  !        !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b)
+  !        H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: w(:,:,:,:)
+
+  allocate(w(nV,nO,nO,nV))
+
+  !$omp parallel &
+  !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) &
+  !$omp private(a,beta,i,j,b) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do a = 1, nV
+      H_vo(a,i) = cc_space_f_vo(a,i)
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
+  ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b)
+
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemv('N',nV*nO, nO*nV, &
+             1d0, w   , size(w,1) * size(w,2), &
+                  t1  , 1, &
+             1d0, H_vo, 1)
+
+  deallocate(w)
+  
+end
+
+! R2
+
+subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)
+
+  ! out
+  double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2
+
+  ! internal
+  double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:)
+  double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:)
+  integer                       :: u,v,i,j,beta,gam,a,b
+
+  allocate(g_occ(nO,nO), g_vir(nV,nV))
+  allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV))
+  allocate(A1(nO,nO,nO,nO))
+  
+  call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
+  call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
+  call compute_A1(nO,nV,t1,t2,tau,A1)
+  call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, &
+       cc_space_v_vvvo,cc_space_v_vvoo,J1)
+  call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, &
+       cc_space_v_ovov,cc_space_v_vvov,K1)
+
+  ! Residual
+  !r2 = 0d0
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,cc_space_v_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+         r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do j = 1, nO
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + A1(u,v,i,j) * tau(i,j,beta,gam)
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
+             1d0, A1, size(A1,1) * size(A1,2), &
+                  tau, size(tau,1) * size(tau,2), &
+             1d0, r2, size(r2,1) * size(r2,2))
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do b = 1, nv
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + B1(a,b,beta,gam) * tau(u,v,a,b)
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate(B1(nV,nV,nV,nV))
+  call compute_B1(nO,nV,t1,t2,B1)
+  call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
+             1d0, tau, size(tau,1) * size(tau,2), &
+                  B1 , size(B1,1) * size(B1,2), &
+             1d0, r2, size(r2,1) * size(r2,2))
+  deallocate(B1)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         + g_vir(a,beta) * t2(u,v,a,gam) &
+  !         + g_vir(a,gam)  * t2(v,u,a,beta) ! P
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:)
+  allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,t2,X_oovv) &
+  !$omp private(u,v,gam,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do a = 1, nV
+    do gam = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          X_oovv(u,v,gam,a) = t2(u,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nO*nV,nV,nV, &
+             1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
+                  g_vir, size(g_vir,1), &
+             0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Y_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         - g_occ(u,i) * t2(i,v,beta,gam) &
+  !         - g_occ(v,i) * t2(i,u,gam,beta) ! P
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO,nO*nV*nV,nO, &
+             1d0, g_occ , size(g_occ,1), &
+                  t2    , size(t2,1), &
+             0d0, X_oovv, size(X_oovv,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_oovv)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        do a = 1, nV
+  !          r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !          + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) &
+  !          + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_vovv(:,:,:,:)
+  allocate(X_vovv(nV,nO,nV,nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) &
+  !$omp private(u,a,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do u = 1, nO
+        do a = 1, nV
+          X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO,nO*nV*nV,nV, &
+             1d0, t1    , size(t1,1), &
+                  X_vovv, size(X_vovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Y_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        do a = 1, nV
+  !          do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - cc_space_v_ovov(u,a,i,gam)  * t1(i,beta) * t1(v,a) &
+  !           - cc_space_v_ovov(v,a,i,beta) * t1(i,gam)  * t1(u,a) ! P
+  !          enddo
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:)
+  allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) &
+  !$omp private(u,v,gam,i) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do u = 1, nO
+        do a = 1, nV
+          X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nV*nO*nV,nV,nO, &
+              1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), &
+                   t1    , size(t1,1), &
+              0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3))
+
+  call dgemm('N','N',nO,nO*nV*nV,nV, &
+             1d0, t1, size(t1,1), &
+                  Y_vovv, size(Y_vovv,1), &
+             0d0, X_oovv, size(X_oovv,1))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_vovo,Y_vovv)
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) &
+  !         - cc_space_v_oovo(v,u,gam,i)  * t1(i,beta) ! P
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO*nO*nV,nV,nO, &
+             1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), &
+                  t1 , size(t1,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         do a = 1, nV
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) &
+  !           - cc_space_v_ovvo(v,a,gam,i)  * t1(u,a) * t1(i,beta) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: Y_oovo(:,:,:,:)
+  allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) &
+  !$omp private(a,v,gam,i) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do v = 1, nO
+        do a = 1, nV
+          X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO,nO*nV*nO,nV, &
+             1d0, t1, size(t1,1), &
+                  X_vovo, size(X_vovo,1), &
+             0d0, Y_oovo, size(Y_oovo,1))
+
+  call dgemm('N','N',nO*nO*nV, nV, nO, &
+             1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), &
+                  t1    , size(t1,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vovo,Y_oovo)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * &
+  !             (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) &
+  !           + 0.5d0 * (2d0 * J1(v,a,gam,i)  - K1(v,a,i,gam)) * &
+  !             (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:)
+  allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV))
+  !$omp parallel &
+  !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) &
+  !$omp private(u,v,gam,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do v = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N', nO*nV,nO*nV,nV*nO, &
+             1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
+                  Y_voov, size(Y_voov,1) * size(Y_voov,2), &
+             0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_ovvo,Y_voov)
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) &
+  !           - 0.5d0 * K1(v,a,i,gam)  * t2(i,u,beta,a) !P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:)
+  allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV))
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) &
+  !$omp private(u,a,i,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do v = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          Y_ovov(i,a,v,gam) = t2(i,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('T','N',nO*nV,nO*nV,nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+             0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - K1(u,a,i,gam)  * t2(i,v,beta,a) &
+  !           - K1(v,a,i,beta) * t2(i,u,gam,a) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) &
+  !$omp private(u,v,gam,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do a = 1, nV
+    do i = 1, nO
+      do gam = 1, nV
+        do u = 1, nO
+          X_ovov(u,gam,i,a) = K1(u,a,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do v = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          Z_ovov(i,a,v,beta) = t2(i,v,beta,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nV,nO*nV,nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+             0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_ovov,Y_ovov,Z_ovov)
+  
+  ! Change the sign for consistency with the code in spin orbitals
+  !$omp parallel &
+  !$omp shared(nO,nV,r2) &
+  !$omp private(i,j,a,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          r2(i,j,a,b) = -r2(i,j,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  max_r2 = 0d0
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          if (dabs(r2(i,j,a,b)) > max_r2) then
+            max_r2 = dabs(r2(i,j,a,b))
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(g_occ,g_vir,J1,K1,A1)
+  
+end
+
+! A1
+
+subroutine compute_A1(nO,nV,t1,t2,tau,A1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: A1(nO, nO, nO, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta
+
+  !A1 = 0d0
+
+  !do j = 1, nO
+  !  do i = 1, nO
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+
+  !        do a = 1, nV
+  !          A1(u,v,i,j) = A1(u,v,i,j) &
+  !          + cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
+  !          + cc_space_v_vooo(a,v,i,j) * t1(u,a)
+  !          
+  !          do b = 1, nV
+  !            A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
+  !          enddo  
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:)
+  allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO))
+  
+  ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+  !$omp parallel &
+  !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) &
+  !$omp private(u,v,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do v = 1, nO
+        do u = 1, nO
+          A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do u = 1, nO
+        do a = 1, nV
+          X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N', nO, nO*nO*nO, nV, &
+             1d0, t1    , size(t1,1), &
+                  X_vooo, size(X_vooo,1), &
+             0d0, Y_oooo, size(Y_oooo,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,A1,Y_oooo) &
+  !$omp private(u,v,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do v = 1, nO
+        do u = 1, nO
+          A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vooo,Y_oooo)
+
+  ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a)
+  call dgemm('N','N', nO, nO*nO*nO, nV, &
+             1d0, t1      , size(t1,1), &
+                  cc_space_v_vooo, size(cc_space_v_vooo,1), &
+             1d0, A1      , size(A1,1))
+
+  ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
+  call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
+             1d0, tau     , size(tau,1) * size(tau,2), &
+                  cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
+             1d0, A1      , size(A1,1) * size(A1,2))
+   
+end
+
+! B1
+
+subroutine compute_B1(nO,nV,t1,t2,B1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: B1(nV, nV, nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !B1 = 0d0
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do b = 1, nV
+  !      do a = 1, nV
+  !        B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+
+  !        do i = 1, nO
+  !          B1(a,b,beta,gam) = B1(a,b,beta,gam) &
+  !          - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+  !          - cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  
+  double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:)
+  allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV))
+
+  ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+  !$omp parallel &
+  !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) &
+  !$omp private(a,b,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+  call dgemm('N','N', nV*nV*nV, nV, nO, &
+             -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), &
+                   t1      , size(t1,1), &
+              1d0, B1      , size(B1,1) * size(B1,2) * size(B1,3))
+
+  
+  ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+  call dgemm('N','N', nV*nV*nV, nV, nO, &
+             -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), &
+                   t1    , size(t1,1), &
+              0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3))
+
+  !$omp parallel &
+  !$omp shared(nV,B1,Y_vvvv) &
+  !$omp private(a,b,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vvvo,Y_vvvv)
+  
+end
+
+! g_occ
+
+subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV), H_oo(nO, nO)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: g_occ(nO, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !g_occ = 0d0
+
+  !do i = 1, nO
+  !  do u = 1, nO
+  !    g_occ(u,i) = H_oo(u,i)
+  !    
+  !    do a = 1, nV
+  !      g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a)
+  !      
+  !      do j = 1, nO
+  !        g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a)
+  !      enddo
+  !         
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO,nO,nV, &
+             1d0, t1, size(t1,1), &
+                  cc_space_f_vo, size(cc_space_f_vo,1), &
+             0d0, g_occ, size(g_occ,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) &
+  !$omp private(i,j,a,u) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do u = 1, nO
+      g_occ(u,i) = g_occ(u,i) + H_oo(u,i)
+    enddo
+  enddo
+  !$omp end do
+  
+  !$omp do collapse(1)
+  do i = 1, nO
+    do j = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+
+! g_vir
+
+subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV), H_vv(nV, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: g_vir(nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !g_vir = 0d0
+
+  !do beta = 1, nV
+  !  do a = 1, nV
+  !    g_vir(a,beta) = H_vv(a,beta)
+  !    
+  !    do i = 1, nO
+  !      g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta)
+  !      
+  !      do b = 1, nV
+  !        g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b)
+  !      enddo
+  !         
+  !    enddo
+  !  enddo
+  !enddo
+  
+  call dgemm('N','N',nV,nV,nO, &
+             -1d0, cc_space_f_vo , size(cc_space_f_vo,1), &
+                   t1   , size(t1,1), &
+              0d0, g_vir, size(g_vir,1))
+      
+  !$omp parallel &
+  !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) &
+  !$omp private(i,b,a,beta) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do a = 1, nV
+      g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta)
+    enddo
+  enddo
+  !$omp end do
+
+  !$omp do collapse(1)
+  do beta = 1, nV
+    do i = 1, nO
+      do b = 1, nV
+        do a = 1, nV
+          g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+
+! J1
+
+subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO)
+  double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO)
+  double precision, intent(out) :: J1(nO, nV, nV, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !J1 = 0d0
+
+  !do i = 1, nO
+  !  do beta = 1, nV
+  !    do a = 1, nV
+  !      do u = 1, nO
+  !       J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i)
+
+  !        do j = 1, nO
+  !          J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !          - cc_space_v_ovoo(u,a,j,i) * t1(j,beta)
+  !        enddo
+
+  !        do b = 1, nV
+  !          J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !          + cc_space_v_vvvo(b,a,beta,i) * t1(u,b)    
+  !        enddo
+
+  !        do j = 1, nO
+  !          do b = 1, nV
+  !           J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !           - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) &
+  !           + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
+  !          enddo
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:)
+  allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) &
+  !$omp private(i,j,a,u,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = v_ovvo(u,a,beta,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nV*nO,nV,nO, &
+            -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), &
+                  t1    , size(t1,1), &
+             0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Y_ovov) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  deallocate(X_ovoo)
+
+  ! v_vvvo(b,a,beta,i) * t1(u,b)
+  call dgemm('N','N',nO,nV*nV*nO,nV, &
+             1d0, t1    , size(t1,1), &
+                  v_vvvo, size(v_vvvo,1), &
+             1d0, J1    , size(J1,1))
+
+  !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) &
+  double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:)
+  allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO))
+  !$omp parallel &
+  !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) &
+  !$omp private(i,beta,a,u,b,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do beta = 1, nV
+        do u = 1, nO
+          Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          X_voov(a,i,j,b) = v_vvoo(a,b,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','T',nO*nV,nV*nO,nO*nV, &
+             -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+                   X_voov, size(X_voov,1) * size(X_voov,2), &
+              0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2))
+  deallocate(X_voov)
+
+  double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:)
+  allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO))
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) &
+  !$omp private(i,beta,a,u,j,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  
+  !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do i = 1, nO
+        do a = 1, nV
+          Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          X_ovvo(u,beta,b,j) = t2(u,j,beta,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','T',nO*nV,nV*nO,nV*nO, &
+             1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
+                  Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), &
+             0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Z_ovvo) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_ovvo,Z_ovvo,Y_ovov)  
+  
+end
+
+! K1
+
+subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV)
+  double precision, intent(in)  :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO)
+  double precision, intent(out) :: K1(nO, nV, nO, nV)
+
+  double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !K1 = 0d0
+
+  !do beta = 1, nV
+  !  do i = 1, nO
+  !    do a = 1, nV
+  !      do u = 1, nO
+  !        K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta)
+
+  !        do j = 1, nO
+  !          K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !          - cc_space_v_ovoo(u,a,i,j) * t1(j,beta)
+  !        enddo
+
+  !        do b = 1, nV
+  !          K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !          + cc_space_v_vvov(b,a,i,beta) * t1(u,b)    
+  !        enddo
+
+  !        do j = 1, nO
+  !          do b = 1, nV
+  !           K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !           - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta))
+  !          enddo
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) &
+  !$omp private(i,beta,a,u,j,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          K1(u,a,i,beta) = v_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do j = 1, nO
+        do b = 1, nV
+          X(b,j,a,i) = - v_vvoo(b,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO*nV*nO,nV,nO, &
+            -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), &
+                  t1    , size(t1,1), &
+            1d0, K1    , size(K1,1) * size(K1,2) * size(K1,3))
+
+  call dgemm('N','N',nO,nV*nO*nV,nV, &
+             1d0, t1    , size(t1,1), &
+                  v_vvov, size(v_vvov,1), &
+             1d0, K1    , size(K1,1))
+
+  ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i)
+  call dgemm('N','N',nV*nO,nO*nV,nV*nO, &
+             1d0, Y, size(Y,1) * size(Y,2), &
+                  X, size(X,1) * size(X,2), &
+             0d0, Z, size(Z,1) * size(Z,2))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,Z) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+   do beta = 1, nV
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X,Y,Z)
+  
+end
diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f
new file mode 100644
index 00000000..6f2de11c
--- /dev/null
+++ b/src/ccsd/ccsd_spin_orb.irp.f
@@ -0,0 +1,16 @@
+! Prog
+
+program ccsd
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD in spin orbitals
+  END_DOC
+
+  read_wf = .True.
+  touch read_wf
+
+  call run_ccsd_spin_orb
+  
+end
diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f
new file mode 100644
index 00000000..23e2cef1
--- /dev/null
+++ b/src/ccsd/ccsd_spin_orb_sub.irp.f
@@ -0,0 +1,2301 @@
+! Code
+
+subroutine run_ccsd_spin_orb
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD in spin orbitals
+  END_DOC
+
+  double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:)
+  double precision, allocatable :: r1(:,:), r2(:,:,:,:)
+  double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:)
+  double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:)
+  
+  double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:)
+  double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:)
+  double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:)
+  double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:)
+  double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:)
+  double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:)
+  double precision, allocatable :: v_vvvv(:,:,:,:)
+
+  double precision, allocatable :: all_err(:,:), all_t(:,:)
+
+  logical                       :: not_converged
+  integer, allocatable          :: list_occ(:,:), list_vir(:,:)
+  integer                       :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4)
+  integer                       :: nb_iter, i,j,a,b
+  double precision              :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
+  integer(bit_kind)             :: det(N_int,2)
+
+  det = psi_det(:,:,cc_ref)
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+ 
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  !call extract_n_spin(det,n_spin)
+  nOa = cc_nOa !n_spin(1)
+  nOb = cc_nOb !n_spin(2)
+  nVa = cc_nVa !n_spin(3)
+  nVb = cc_nVb !n_spin(4)
+
+  ! Total number of occ/vir spin orb
+  nO = cc_nOab !nOa + nOb
+  nV = cc_nVab !nVa + nVb
+  ! Debug
+  !print*,nO,nV
+
+  ! Number of occ/vir spin orb per spin
+  nO_S = cc_nO_S !(/nOa,nOb/)
+  nV_S = cc_nV_S !(/nVa,nVb/)
+  ! Debug
+  !print*,nO_S,nV_S
+
+  ! Maximal number of occ/vir 
+  nO_m = cc_nO_m !max(nOa, nOb)
+  nV_m = cc_nV_m !max(nVa, nVb)
+  ! Debug
+  !print*,nO_m,nV_m
+  
+  allocate(list_occ(nO_m,2), list_vir(nV_m,2))
+  list_occ = cc_list_occ_spin
+  list_vir = cc_list_vir_spin
+  ! Debug
+  !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir)
+  !print*,list_occ(:,1)
+  !print*,list_occ(:,2)
+  !print*,list_vir(:,1)
+  !print*,list_vir(:,2)
+
+  ! Allocation
+  allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV))
+  allocate(r1(nO,nV), r2(nO,nO,nV,nV))
+  allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV))
+  allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV))
+  allocate(v_oooo(nO,nO,nO,nO))
+  !allocate(v_vooo(nV,nO,nO,nO))
+  allocate(v_ovoo(nO,nV,nO,nO))
+  allocate(v_oovo(nO,nO,nV,nO))
+  allocate(v_ooov(nO,nO,nO,nV))
+  allocate(v_vvoo(nV,nV,nO,nO))
+  !allocate(v_vovo(nV,nO,nV,nO))
+  !allocate(v_voov(nV,nO,nO,nV))
+  allocate(v_ovvo(nO,nV,nV,nO))
+  allocate(v_ovov(nO,nV,nO,nV))
+  allocate(v_oovv(nO,nO,nV,nV))
+  !allocate(v_vvvo(nV,nV,nV,nO))
+  !allocate(v_vvov(nV,nV,nO,nV))
+  !allocate(v_vovv(nV,nO,nV,nV))
+  !allocate(v_ovvv(nO,nV,nV,nV))
+  !allocate(v_vvvv(nV,nV,nV,nV))
+  allocate(f_o(nO), f_v(nV))
+  allocate(f_oo(nO, nO))
+  allocate(f_ov(nO, nV))
+  allocate(f_vv(nV, nV))
+  
+  ! Allocation for the diis
+  if (cc_update_method == 'diis') then
+    allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+    all_err = 0d0
+    all_t   = 0d0
+  endif
+
+  ! Fock elements
+  call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo)
+  call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov)
+  call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv)
+
+  ! Diag elements
+  do i = 1, nO
+    f_o(i) = f_oo(i,i)
+  enddo
+  do i = 1, nV
+    f_v(i) = f_vv(i,i)
+  enddo
+
+  ! Bi electronic integrals from list
+  call wall_time(ti)
+  ! OOOO
+  call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo)
+
+  ! OOO V
+  !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo)
+  call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo)
+  call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov)
+
+  ! OO VV
+  call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo)
+  !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov)
+  call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov)
+  call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv)
+
+  ! O VVV
+  !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo)
+  !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv)
+  !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv)
+
+  ! VVVV
+  !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv)
+  call wall_time(tf)
+  if (cc_dev) then
+    print*,'Load bi elec int:',tf-ti,'s'
+  endif
+
+  ! Init of T
+  t1 = 0d0
+  call guess_t1(nO,nV,f_o,f_v,f_ov,t1)
+  call guess_t2(nO,nV,f_o,f_v,v_oovv,t2)
+  call compute_tau_spin(nO,nV,t1,t2,tau)
+  call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+  
+  ! Loop init
+  nb_iter = 0
+  not_converged = .True.
+  r1 = 0d0
+  r2 = 0d0
+  max_r1 = 0d0
+  max_r2 = 0d0
+
+  call det_energy(det,uncorr_energy)
+  print*,'Det energy', uncorr_energy
+  call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+  print*,'guess energy', uncorr_energy+energy, energy
+  
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+
+  call wall_time(ta)
+
+  ! Loop
+  do while (not_converged)
+
+    ! Intermediates
+    call wall_time(tbi)
+    call wall_time(ti)
+    call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
+    call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
+    call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute cFs:',tf-ti,'s'
+    endif
+    
+    call wall_time(ti)
+    call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
+    call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
+    !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute cFs:',tf-ti,'s'
+    endif
+
+    ! Residuals
+    call wall_time(ti)
+    call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute r1:',tf-ti,'s'
+    endif
+    call wall_time(ti)
+    call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute r2:',tf-ti,'s'
+    endif
+
+    ! Max elements in the residuals
+    max_r1 = maxval(abs(r1(:,:)))
+    max_r2 = maxval(abs(r2(:,:,:,:)))
+    max_r  = max(max_r1,max_r2)
+
+    call wall_time(ti)
+    ! Update
+    if (cc_update_method == 'diis') then
+      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+
+    ! Standard update as T = T - Delta
+    elseif (cc_update_method == 'none') then
+      call update_t1(nO,nV,f_o,f_v,r1,t1)
+      call update_t2(nO,nV,f_o,f_v,r2,t2)
+    else
+      print*,'Unkonw cc_method_method: '//cc_update_method
+    endif
+
+    call compute_tau_spin(nO,nV,t1,t2,tau)
+    call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Update:',tf-ti,'s'
+    endif
+
+    ! Print
+    call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+    call wall_time(tfi)
+    
+    write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', &
+         uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
+    if (cc_dev) then
+      print*,'Total:',tfi-tbi,'s'
+    endif
+
+    ! Convergence
+    nb_iter = nb_iter + 1
+    if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
+      not_converged = .False.
+    endif
+
+  enddo
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(tb)
+  print*,'Time: ',tb-ta, ' s'
+  print*,''
+  if (max_r < cc_thresh_conv) then
+    write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
+  else
+    write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
+  endif
+  print*,''
+  write(*,'(A15,F18.12,A3)') ' E(CCSD)     = ', uncorr_energy+energy, ' Ha'
+  write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
+  write(*,'(A15,1pE10.2,A3)')' Conv        = ', max_r
+  print*,''
+
+  call write_t1(nO,nV,t1)
+  call write_t2(nO,nV,t2)
+
+  ! Deallocate
+  if (cc_update_method == 'diis') then
+     deallocate(all_err,all_t)
+  endif
+  deallocate(tau,tau_t)
+  deallocate(r1,r2)
+  deallocate(cF_oo,cF_ov,cF_vv)
+  deallocate(cW_oooo,cW_ovvo)!,cW_vvvv)
+  deallocate(v_oooo)
+  deallocate(v_ovoo,v_oovo)
+  deallocate(v_ovvo,v_ovov,v_oovv)
+  
+  if (cc_par_t .and. elec_alpha_num  +elec_beta_num > 2) then
+    double precision :: t_corr
+    print*,'CCSD(T) calculation...'
+    call wall_time(ta)
+    !allocate(v_vvvo(nV,nV,nV,nO))
+    !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
+    !   cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+    !   cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+    !   nV,nV,nV,nO, v_vvvo)
+
+    !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr)
+    call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr)
+    !print*,'Working on it...'
+    !call abort
+    call wall_time(tb)
+    print*,'Done'
+    print*,'Time: ',tb-ta, ' s'
+    print*,''
+    write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + t_corr, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' E(T)        = ', t_corr, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha'
+    print*,''
+  endif
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+  
+  deallocate(f_oo,f_ov,f_vv,f_o,f_v)
+  deallocate(v_ooov,v_vvoo,t1,t2)
+  !deallocate(v_ovvv,v_vvvo,v_vovv)
+  !deallocate(v_vvvv)
+  
+end
+
+! Energy
+
+subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy)
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD energy in spin orbitals
+  END_DOC
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: energy
+
+  integer                       :: i,j,a,b
+
+
+  energy = 0d0
+
+  do i=1,nO
+      do a=1,nV
+      energy = energy + Fov(i,a) * t1(i,a)
+    end do
+  end do
+
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          energy = energy                                & 
+                 + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) &
+                 + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+
+end
+
+! Tau
+
+subroutine compute_tau_spin(nO,nV,t1,t2,tau)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: tau(nO,nO,nV,nV)
+  
+  integer                       :: i,j,k,l
+  integer                       :: a,b,c,d
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tau,t1,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+
+! Tau_t
+
+subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: tau_t(nO,nO,nV,nV)
+
+  integer                       :: i,j,k,l
+  integer                       :: a,b,c,d
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tau_t,t1,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+
+! R1
+
+subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: f_o(nO), f_v(nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: cF_oo(nO,nO)
+  double precision,intent(in)   :: cF_ov(nO,nV)
+  double precision,intent(in)   :: cF_vv(nV,nV)
+  double precision,intent(in)   :: v_oovo(nO,nO,nV,nO)
+  double precision,intent(in)   :: v_ovov(nO,nV,nO,nV)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: r1(nO,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  !double precision, allocatable :: X_vovv(:,:,:,:)
+  double precision, allocatable :: X_oovv(:,:,:,:)
+  double precision              :: accu
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, &
+  !$OMP v_ovov,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(1)
+  do a=1,nV
+    do i=1,nO
+      r1(i,a) = Fov(i,a)
+      do e=1,nV
+        do m=1,nO
+          r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e)
+        end do
+      end do
+      do f=1,nV
+        do n=1,nO
+          r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do a=1,nV
+  !  do i=1,nO
+  !    do e=1,nV
+  !      r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nV, nV, &
+             1d0, t1   , size(t1,1), &
+                  cF_vv, size(cF_vv,1), &
+             1d0, r1   , size(r1,1))
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do m=1,nO
+  !      r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO, nV, nO, &
+             -1d0, cF_oo, size(cF_oo,1), &
+                   t1   , size(t1,1), &
+              1d0, r1   , size(r1,1))
+
+  !do a=1,nV
+  !  do i=1,nO
+  !    do f=1,nV
+  !      do e=1,nV
+  !        do m=1,nO
+  !          r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !allocate(X_vovv(nV,nO,nV,nV))
+  double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:)
+  allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV))
+
+  do f = 1, nV
+    call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+           cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+           nO,nV,nV, v_ovvf)
+    !$OMP PARALLEL &
+    !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) &
+    !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+    !$OMP DEFAULT(NONE)
+    
+
+    !$OMP DO collapse(3)
+    !do f = 1, nV
+      do e = 1, nV
+         do m = 1, nO
+           do a = 1, nV
+             !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f)
+             X_vovf(a,m,e) = v_ovvf(m,a,e)
+          enddo
+        enddo
+      enddo
+    !enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+      
+    call dgemm('N','T', nO, nV, nO*nV, &
+             -0.5d0, t2(1,1,1,f), size(t2,1), &
+                     X_vovf, size(X_vovf,1), &
+              1d0  , r1    , size(r1,1))
+  enddo
+  
+  !call dgemm('N','T', nO, nV, nO*nV*nV, &
+  !           -0.5d0, t2    , size(t2,1), &
+  !                   X_vovv, size(X_vovv,1), &
+  !            1d0  , r1    , size(r1,1))
+  
+  deallocate(X_vovf)
+  !deallocate(X_vovv)
+  allocate(X_oovv(nO,nO,nV,nV))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,t2,X_oovv, &
+  !$OMP f_o,f_v,v_oovo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do e=1,nV
+  !      do m=1,nO
+  !        do n=1,nO
+  !          r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  !$OMP DO collapse(3)
+  do a = 1, nV
+    do e = 1, nV
+      do m = 1, nO
+        do n = 1, nO
+          X_oovv(n,m,e,a) = t2(m,n,a,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO, nV, nO*nO*nV, &
+             -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), &
+                     X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
+             1d0   , r1    , size(r1,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do a = 1, nV
+    do i = 1, nO
+      r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_oovv)
+
+end
+
+! R2
+
+subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: cF_oo(nO,nO)
+  double precision,intent(in)   :: cF_ov(nO,nV)
+  double precision,intent(in)   :: cF_vv(nV,nV)
+  double precision,intent(in)   :: f_o(nO), f_v(nV)
+  double precision,intent(in)   :: cW_oooo(nO,nO,nO,nO)
+  !double precision,intent(in)   :: cW_vvvv(nV,nV,nV,nV)
+  double precision,intent(in)   :: cW_ovvo(nO,nV,nV,nO)
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovoo(nO,nV,nO,nO)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovvo(nO,nV,nV,nO)
+  !double precision,intent(in)   :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV)
+
+  double precision,intent(out)  :: r2(nO,nO,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: X_vvoo(:,:,:,:)
+  !double precision, allocatable :: A_vvov(:,:,:,:)
+  double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:)
+  double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:)
+  double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:)
+  double precision, allocatable :: A_vv(:,:)
+  double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:)
+  double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+  !        r2(i,j,a,b) = v_oovv(i,j,a,b)
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e)
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(X_oovv(nO,nO,nV,nV))
+  call dgemm('N','T',nO*nO*nV, nV, nV, &
+             1d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                  cF_VV , size(cF_vv,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !deallocate(X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV))
+  call dgemm('T','N', nV, nV, nO, &
+             1d0, t1   , size(t1,1), &
+                  cF_ov, size(cF_ov,1), &
+             0d0, A_vv , size(A_vv,1))
+
+  call dgemm('N','T', nO*nO*nV, nV, nV, &
+             0.5d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                    A_vv  , size(A_vv,1), &
+             0d0  , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+             
+  deallocate(A_vv)!,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j)
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do i=1,nO
+        do m=1,nO
+          X_oovv(m,i,a,b) = t2(i,m,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nO, nO*nV*nV, nO, &
+             1d0, cF_oo , size(cF_oo,1), &
+                  X_oovv, size(X_oovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(Y_oovv)!,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  
+  call dgemm('N','T', nO, nO, nV, &
+        1d0, t1   , size(t1,1), &
+             cF_ov, size(cF_ov,1), &
+        0d0, A_oo , size(A_oo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,B_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do i = 1, nO
+        do m = 1, nO
+          B_oovv(m,i,a,b) = t2(i,m,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nO*nV*nV, nO, &
+             0.5d0, A_oo, size(A_oo,1), &
+                    B_oovv, size(B_oovv,1), &
+             0d0  , X_oovv, size(X_oovv,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(A_oo,B_oovv,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do n=1,nO
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO*nO, nV*nV, nO*nO, &
+             0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), &
+                    tau    , size(tau,1) * size(tau,2), &
+             1d0  , r2     , size(r2,1) * size(r2,2))
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do f=1,nV
+  !          do e=1,nV
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !call dgemm('N','T', nO*nO, nV*nV, nV*nV, &
+  !           0.5d0, tau    , size(tau,1) * size(tau,2), &
+  !                  cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), &
+  !           1d0  , r2     , size(r2,1) * size(r2,2))
+  double precision :: ti,tf
+  call wall_time(ti)
+  call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
+  call wall_time(tf)
+  if (cc_dev) then
+    print*,'cW_vvvv:',tf-ti,'s'
+  endif
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b)                                                 & 
+  !                        + t2(i,m,a,e)*cW_ovvo(m,b,e,j) &
+  !                        - t2(j,m,a,e)*cW_ovvo(m,b,e,i) &
+  !                        - t2(i,m,b,e)*cW_ovvo(m,a,e,j) &
+  !                        + t2(j,m,b,e)*cW_ovvo(m,a,e,i) &
+  !                        - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) &
+  !                        + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) &
+  !                        + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) &
+  !                        - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do a = 1, nV
+    do i = 1, nO
+      do e = 1, nV
+        do m = 1, nO
+          A_ovov(m,e,i,a) = t2(i,m,a,e)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO NOWAIT
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do e = 1, nV
+        do m = 1, nO
+          B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO*nV, nV*nO, nO*nV, &
+             1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), &
+                  B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), &
+             0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) &
+                                    - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_ovov,B_ovvo,X_ovvo)
+  allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do m = 1, nO
+    do j = 1, nO
+      do b = 1, nV
+        do e = 1, nV
+          A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nV*nO*nO, nV, &
+             1d0, t1    , size(t1,1), &
+                  A_vvoo, size(A_vvoo,1), &
+             0d0, B_ovoo, size(B_ovoo,1))
+  
+  call dgemm('N','N', nO*nV*nO, nV, nO, &
+             1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), &
+                  t1    , size(t1,1), &
+             0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,C_ovov,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) &
+                                    + C_ovov(i,a,j,b) - C_ovov(j,a,i,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_vvoo, B_ovoo, C_ovov)
+                  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO))
+  allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO))
+  do b = 1, nV
+
+    call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+         cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+         nV,nV,nO, v_vbvo)
+    
+    !$OMP PARALLEL &
+    !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) &
+    !$OMP PRIVATE(i,j,a,e,m) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do j = 1, nO
+        !do b = 1, nV
+          do a = 1, nV
+            !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j)
+            A_vbov(a,j,e) = v_vbvo(a,e,j)
+          enddo
+        !enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+    call dgemm('N','T', nV*nO, nO, nV, &
+               1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), &
+                    t1    , size(t1,1), &
+               0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2))
+    !call dgemm('N','T', nV*nV*nO, nO, nV, &
+    !           1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), &
+    !                t1    , size(t1,1), &
+    !           0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3))
+    
+    !$OMP PARALLEL &
+    !$OMP SHARED(b,r2,X_vboo,nO,nV) &
+    !$OMP PRIVATE(i,j,a,e,m) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    !do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, NO
+             !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j)
+             r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j)
+          enddo
+        enddo
+      enddo
+    !enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+  enddo
+  
+  !deallocate(A_vvov)!,X_vvoo)
+  deallocate(A_vbov, X_vboo, v_vbvo)
+  allocate(X_vvoo(nV,nV,nO,nO))
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !allocate(X_vvoo(nV,nV,nO,nO))
+  
+  call dgemm('T','N', nV, nV*nO*nO, nO, &
+             1d0, t1    , size(t1,1), &
+                  v_ovoo, size(v_ovoo,1), &
+             0d0, X_vvoo, size(X_vvoo,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_vvoo)
+
+end
+
+! Use cF_oo
+
+subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2)
+
+  implicit none
+
+  integer,intent(in)              :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)
+  double precision, intent(in)    :: F_oo(nO,nV), F_ov(nO,nV)
+  double precision, intent(in)    :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, allocatable   :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:)
+  integer                         :: i,j,m,a,b
+
+  allocate(cF_oo(nO,nO))
+  
+  call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do m=1,nO
+  !      r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO, nV, nO, &
+             -1d0, cF_oo, size(cF_oo,1), &
+                   t1   , size(t1,1), &
+              1d0, r1   , size(r1,1))
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j)
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do i=1,nO
+        do m=1,nO
+          X_oovv(m,i,a,b) = t2(i,m,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nO, nO*nV*nV, nO, &
+             1d0, cF_oo , size(cF_oo,1), &
+                  X_oovv, size(X_oovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(cF_oo,X_oovv,Y_oovv)
+
+end
+
+! Use cF_ov
+
+subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)    :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cF_ov(:,:), A_oo(:,:), A_vv(:,:)
+  double precision, allocatable   :: X_oovv(:,:,:,:), B_oovv(:,:,:,:)
+  integer                         :: i,j,a,b,e,m
+
+  allocate(cF_ov(nO,nV))
+  
+  call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t2,cF_ov,nO,nV) &
+  !$OMP PRIVATE(i,a,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(1)
+  do a=1,nV
+    do i=1,nO
+      do e=1,nV
+        do m=1,nO
+          r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV))
+  call dgemm('T','N', nV, nV, nO, &
+             1d0, t1   , size(t1,1), &
+                  cF_ov, size(cF_ov,1), &
+             0d0, A_vv , size(A_vv,1))
+
+  call dgemm('N','T', nO*nO*nV, nV, nV, &
+             0.5d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                    A_vv  , size(A_vv,1), &
+             0d0  , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,r2,X_oovv) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+             
+  deallocate(A_vv)
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  
+  call dgemm('N','T', nO, nO, nV, &
+        1d0, t1   , size(t1,1), &
+             cF_ov, size(cF_ov,1), &
+        0d0, A_oo , size(A_oo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,B_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do i = 1, nO
+        do m = 1, nO
+          B_oovv(m,i,a,b) = t2(i,m,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nO*nV*nV, nO, &
+             0.5d0, A_oo, size(A_oo,1), &
+                    B_oovv, size(B_oovv,1), &
+             0d0  , X_oovv, size(X_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(cF_ov,A_oo,B_oovv,X_oovv)
+  
+end
+
+! Use cF_vv
+
+subroutine use_cF_vv(nO,nV,t1,t2,r1,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cF_vv(:,:)
+  integer                         :: i,j,a,b,e,m
+
+  allocate(cF_vv(nV,nV))
+  
+  !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv)
+
+  deallocate(cF_vv)
+  
+end
+
+! Use cW_vvvd
+
+subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)    :: v_oovv(nO,nO,nV,nV)
+  !double precision, intent(in)    :: v_vovv(nV,nO,nV,nV)
+  
+  double precision, intent(inout) :: r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:)
+  integer                         :: i,j,e,f
+  double precision                :: ti,tf
+
+  allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV))
+
+  !PROVIDE cc_nVab
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do f=1,nV
+  !          do e=1,nV
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  do f = 1, nV
+    call wall_time(ti)
+    !$OMP PARALLEL &
+    !$OMP SHARED(tau,tau_f,f,nO,nV) &
+    !$OMP PRIVATE(i,j,e) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          tau_f(i,j,e) = tau(i,j,e,f)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'1st transpo', tf-ti
+    endif
+
+    call wall_time(ti)
+    call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+         cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+         nV,nV,nV, v_vvvf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'vvvf', tf-ti
+    endif
+    call wall_time(ti)
+    call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, &
+         cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, &
+         nV,nO,nV, v_vovf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'vovf', tf-ti
+    endif
+    
+    call wall_time(ti)
+    call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'cW_vvvf', tf-ti
+    endif
+
+    call wall_time(ti)
+    call dgemm('N','T', nO*nO, nV*nV, nV, &
+               0.5d0, tau_f    , size(tau_f,1) * size(tau_f,2), &
+                      cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), &
+               1d0  , r2     , size(r2,1) * size(r2,2))
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'last dgemm', tf-ti
+    endif
+  enddo
+
+  deallocate(cW_vvvf,v_vvvf,v_vovf)
+  
+end
+
+! cF_oo
+
+subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: tau_t(nO,nO,nV,nV)
+  double precision,intent(in)   :: Foo(nO,nO)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: v_ooov(nO,nO,nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cF_oo(nO,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision,external     :: Kronecker_Delta
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) &
+  !$OMP PRIVATE(i,m,n,e) &
+  !$OMP DEFAULT(NONE)
+  
+  !do i=1,nO
+  !  do m=1,nO
+  !    cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i)
+  !  end do
+  !end do
+  !$OMP DO collapse(1)
+  do i=1,nO
+    do m=1,nO
+        cF_oo(m,i) = Foo(m,i)
+    end do
+  end do
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, nO
+    cF_oo(i,i) = 0d0
+  end do
+  !$OMP END DO
+  
+  do e=1,nV
+    do n=1,nO
+      !$OMP DO collapse(1)
+      do i=1,nO
+        do m=1,nO
+          cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e)
+        end do
+      end do
+      !$OMP END DO
+    end do
+  end do
+  !$OMP END PARALLEL
+
+  !do i=1,nO
+  !  do m=1,nO
+  !    do e=1,nV
+  !      cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nO, nV,&
+             0.5d0, Fov  , size(Fov,1), &
+                    t1   , size(t1,1), &
+             1d0  , cF_oo, size(cF_oo,1))
+
+  !do i=1,nO
+  !  do m=1,nO
+  !    do f=1,nV
+  !      do e=1,nV
+  !        do n=1,nO
+  !          cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nO, nO*nV*nV, &
+             0.5d0, v_oovv, size(v_oovv,1), &
+                    tau_t , size(tau_t,1), &
+             1d0  , cF_oo , size(cF_oo,1)) 
+  
+end
+
+! cF_ov
+
+subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: Fov(nO,nV),v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cF_ov(nO,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) &
+  !$OMP PRIVATE(i,a,m,n,e,f) &
+  !$OMP DEFAULT(NONE)
+  
+  !cF_ov = Fov
+
+  !$OMP DO collapse(1)
+  do e=1,nV
+    do m=1,nO
+      cF_ov(m,e) = Fov(m,e)
+      do f=1,nV
+        do n=1,nO
+          cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
+
+! cF_vv
+
+subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: tau_t(nO,nO,nV,nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: Fvv(nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: cF_vv(nV,nV)
+  
+  double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:)
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_vv,Fvv,nO,nV) &
+  !$OMP PRIVATE(e,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do e=1,nV
+    do a=1,nV
+      cF_vv(a,e) = Fvv(a,e)
+    end do
+  end do
+  !$OMP END DO
+  !$OMP DO
+  do e = 1, nV
+    cF_vv(e,e) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ 
+  !do e=1,nV
+  !  do a=1,nV
+  !    do m=1,nO
+  !      cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nV, nV, nO, &
+             -0.5d0, t1   , size(t1,1), &
+                     Fov  , size(Fov,1), &
+              1d0  , cF_vv, size(cF_vv,1))
+  
+  !do e=1,nV
+  !  do a=1,nV
+  !    do m=1,nO
+  !      do f=1,nV
+  !        cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e)
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV))
+  do f = 1, nV
+
+     call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+                              cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+                              nO,nV,nV, v_ovfv)
+
+    !$OMP PARALLEL &
+    !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) &
+    !$OMP PRIVATE(m,a,e) &
+    !$OMP DEFAULT(NONE)
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do a = 1, nV
+        do m = 1, nO
+          !X_ovfv(m,a,e) = v_ovvv(m,a,f,e)
+          X_ovfv(m,a,e) = v_ovfv(m,a,e)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    call dgemv('T', nO, nV*nV, &
+               !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), &
+               1d0, X_ovfv, size(X_ovfv,1), &
+                    t1(1,f), 1, &
+               1d0, cF_vv, 1)
+  enddo
+  deallocate(v_ovfv,X_ovfv)
+
+  !do e=1,nV
+  !  do a=1,nV
+  !    do f=1,nV
+  !      do n=1,nO
+  !        do m=1,nO
+  !          cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  do f = 1, nV
+     call dgemm('T','N', nV, nV, nO*nO,&
+                -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), &
+                        v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), &
+                1d0   , cF_vv, size(cF_vv,1))
+  enddo
+
+end
+
+! cW_oooo
+
+subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oooo(nO,nO,nO,nO)
+  double precision,intent(in)   :: v_ooov(nO,nO,nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cW_oooo(nO,nO,nO,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: X_oooo(:,:,:,:)
+
+  ! oooo block  
+
+  !cW_oooo = v_oooo
+
+  !do j=1,nO
+  !  do i=1,nO
+  !    do n=1,nO
+  !      do m=1,nO
+
+  !        do e=1,nV
+  !          cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(X_oooo(nO,nO,nO,nO))
+  
+  call dgemm('N','T', nO*nO*nO, nO, nV, &
+             1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), &
+                  t1    , size(t1,1), &
+             0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3))
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) &
+  !$OMP PRIVATE(i,j,m,n) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do i=1,nO
+      do n=1,nO
+        do m=1,nO
+          cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_oooo)
+  
+  !do m=1,nO
+  !  do n=1,nO
+  !    do i=1,nO
+  !      do j=1,nO
+  !         
+  !        do e=1,nV
+  !          do f=1,nV
+  !            cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  call dgemm('N','T', nO*nO, nO*nO, nV*nV, &
+             0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), &
+                     tau    , size(tau,1) * size(tau,2), &
+             1.d0  , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2))
+  
+end
+
+! cW_ovvo
+
+subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovo(nO,nO,nV,nO)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovvo(nO,nV,nV,nO)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: cW_ovvo(nO,nV,nV,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:)
+  double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:)
+  double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do i = 1, nO
+          cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do m=1,nO
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do j=1,nO
+  !        do f=1,nV
+  !          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO))
+  do e = 1, nV
+
+    call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+                              cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+                              nO,nV,nV, v_ovev)
+     
+    call dgemm('N','T', nO*nV, nO, nV, &
+               1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), &
+                     t1     , size(t1,1), &
+               0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2))
+    !$OMP PARALLEL &
+    !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) &
+    !$OMP PRIVATE(m,b,j) &
+    !$OMP DEFAULT(NONE)
+    !$OMP DO collapse(2)
+    do j = 1, nO
+      do b = 1, nV
+        do m = 1, nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+  enddo
+  deallocate(v_ovev,cW_oveo)
+  !call dgemm('N','T', nO*nV*nV, nO, nV, &
+  !           1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), &
+  !                 t1     , size(t1,1), &
+  !           1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3))
+
+  !do j=1,nO
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do m=1,nO
+  !        do n=1,nO
+  !          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(A_oovo,v_oovo,nO,nV) &
+  !$OMP PRIVATE(j,e,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do e=1,nV
+      do m=1,nO
+        do n=1,nO
+          A_oovo(n,m,e,j) = v_oovo(m,n,e,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nV, nO*nV*nO, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_oovo, size(A_oovo,1), &
+             0d0, B_vovo, size(B_vovo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) &
+  !$OMP PRIVATE(j,e,m,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do e=1,nV
+      do b=1,nV
+        do m=1,nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(A_oovo,B_vovo)
+
+  !do j=1,nO
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do m=1,nO
+  !        do f=1,nV
+  !          do n=1,nO
+  !            cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) &
+  !                            - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) &
+  !$OMP PRIVATE(f,n,m,e,j,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do n = 1, nO
+        do f = 1, nV
+          A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP DO collapse(3)
+  do e = 1, nV
+    do m = 1, nO
+      do n = 1, nO
+        do f = 1, nV
+          B_voov(f,n,m,e) = v_oovv(m,n,e,f)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO*nV, nV*nO, nV*nO, &
+             1d0, A_voov, size(A_voov,1) * size(A_voov,2), &
+                  B_voov, size(B_voov,1) * size(B_voov,2), &
+             0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2))
+  
+  deallocate(A_voov,B_voov)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) &
+  !$OMP PRIVATE(j,e,m,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do e = 1, nV
+      do b = 1, nV
+        do m = 1, nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(C_ovov)
+
+end
+
+! cW_vvvv
+
+subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
+ 
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_vovv(nV,nO,nV,nV)
+  double precision,intent(in)   :: v_vvvv(nV,nV,nV,nV)
+
+  double precision,intent(out)  :: cW_vvvv(nV,nV,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,c,d,e,f
+  double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:)
+
+  allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) &
+  !$OMP PRIVATE(a,b,c,d,e,f,m) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do d = 1, nV
+    do c = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+
+  !do f=1,nV
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do a=1,nV
+  !        do m=1,nO
+  !          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !$OMP DO collapse(3)
+  do f=1,nV
+    do e=1,nV
+      do a=1,nV
+        do m=1,nO
+          A_ovvv(m,a,e,f) = v_vovv(a,m,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nV, nV*nV*nV, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_ovvv, size(A_ovvv,1), &
+             0d0, B_vvvv, size(B_vvvv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) &
+  !$OMP PRIVATE(a,b,c,d,e,f,m) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do f=1,nV
+    do e=1,nV
+      do b=1,nV
+        do a=1,nV
+          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_ovvv,B_vvvv)
+
+  !do a=1,nV
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do f=1,nV
+  !         
+  !        do m=1,nO
+  !          do n=1,nO
+  !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nV*nV, nV*nV, nO*nO, &
+             0.25d0, tau    , size(tau,1) * size(tau,2), &
+                     v_oovv , size(v_oovv,1) * size(v_oovv,2), &
+             1.d0  , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2))
+
+end
+
+! cW_vvvf
+
+subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
+ 
+  implicit none
+
+  integer,intent(in)            :: nO,nV,f
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_vovf(nV,nO,nV)
+  double precision,intent(in)   :: v_vvvf(nV,nV,nV)
+
+  double precision,intent(out)  :: cW_vvvf(nV,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,c,d,e
+  double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:)
+  double precision :: ti,tf
+
+  allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV))
+  allocate(v_oovf(nO,nO,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) &
+  !$OMP PRIVATE(a,b,c,d,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d)
+        cW_vvvf(a,b,c) = v_vvvf(a,b,c)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+
+  !do f=1,nV
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do a=1,nV
+  !        do m=1,nO
+  !          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !$OMP DO collapse(2)
+  do e=1,nV
+    do a=1,nV
+      do m=1,nO
+        !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f)
+        !A_ovvf(m,a,e) = v_vovv(a,m,e,f)
+        A_ovvf(m,a,e) = v_vovf(a,m,e)
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nV, nV*nV, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_ovvf, size(A_ovvf,1), &
+             0d0, B_vvvf, size(B_vvvf,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) &
+  !$OMP PRIVATE(a,b,c,d,e,m,n) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do e=1,nV
+    do b=1,nV
+      do a=1,nV
+        !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f)
+        cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e)
+      end do
+    end do
+  end do
+  !$OMP END DO NOWAIT
+  
+  !deallocate(A_ovvf,B_vvvf)
+
+  !do a=1,nV
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do f=1,nV
+  !         
+  !        do m=1,nO
+  !          do n=1,nO
+  !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !$OMP DO collapse(2)
+  do e = 1, nV
+    do n = 1, nO
+      do m = 1, nO
+        v_oovf(m,n,e) = v_oovv(m,n,e,f)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL  
+  
+  call dgemm('T','N', nV*nV, nV, nO*nO, &
+             0.25d0, tau    , size(tau,1) * size(tau,2), &
+                     v_oovf , size(v_oovf,1) * size(v_oovf,2), &
+             1.d0  , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2))
+  
+  deallocate(v_oovf)
+  deallocate(A_ovvf,B_vvvf)
+
+end
diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f
new file mode 100644
index 00000000..1f1db87e
--- /dev/null
+++ b/src/ccsd/ccsd_t_space_orb.irp.f
@@ -0,0 +1,412 @@
+! Dumb way
+
+subroutine ccsd_par_t_space(nO,nV,t1,t2,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: energy
+  
+  double precision, allocatable :: W(:,:,:,:,:,:)
+  double precision, allocatable :: V(:,:,:,:,:,:)
+  integer :: i,j,k,a,b,c
+  
+  allocate(W(nO,nO,nO,nV,nV,nV))
+  allocate(V(nO,nO,nO,nV,nV,nV))
+
+  call form_w(nO,nV,t2,W)   
+  call form_v(nO,nV,t1,W,V)
+
+  energy = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c))  !delta_ooovvv(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  
+  energy = energy / 3d0
+  
+  deallocate(V,W)
+end
+
+subroutine form_w(nO,nV,t2,W)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV)
+  
+  integer :: i,j,k,l,a,b,c,d
+
+  W = 0d0
+  do c = 1, nV
+    print*,'W:',c,'/',nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+
+              do d = 1, nV
+                W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+                ! chem (bd|ai)
+                ! phys <ba|di>
+                + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+                + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
+                + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
+                + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
+                + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
+                + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
+              enddo
+
+              do l = 1, nO
+                W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+                ! chem (ck|jl)
+                ! phys <cj|kl>
+                - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
+                - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
+                - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
+                - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
+                - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
+                - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
+              enddo
+
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
+
+subroutine form_v(nO,nV,t1,w,v)
+
+implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: W(nO, nO, nO, nV, nV, nV)
+  double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV)
+
+  integer :: i,j,k,a,b,c
+
+  V = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
+              + cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
+              + cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
+              + cc_space_v_vvoo(a,b,i,j) * t1(k,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
+
+! Main
+
+subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO,nV), f_o(nO), f_v(nV)
+  double precision, intent(in)  :: t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
+  double precision, intent(out) :: energy
+  
+  double precision, allocatable :: W(:,:,:,:,:,:)
+  double precision, allocatable :: V(:,:,:,:,:,:)
+  double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:)
+  double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:)
+  double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:)
+  integer                       :: i,j,k,l,a,b,c,d
+  double precision              :: e,ta,tb, delta, delta_ijk
+ 
+  !allocate(W(nV,nV,nV,nO,nO,nO))
+  !allocate(V(nV,nV,nV,nO,nO,nO))
+  allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV))
+  allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO))
+  allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO))
+
+  ! Temporary arrays
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, &
+  !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
+  !$OMP PRIVATE(a,b,c,d,i,j,k,l) &
+  !$OMP DEFAULT(NONE)
+  
+  !v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+  !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j)
+  
+  !$OMP DO collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do b = 1, nV
+        do d = 1, nV
+          X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do k = 1, nO
+      do c = 1, nV
+        do d = 1, nV
+          T_vvoo(d,c,k,j) = t2(k,j,c,d)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+ 
+  !v_vooo(c,j,k,l) * t2(i,l,a,b) &
+  !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
+
+  !$OMP DO collapse(3)
+  do k = 1, nO
+    do j = 1, nO
+      do c = 1, nV
+        do l = 1, nO
+           X_ovoo(l,c,j,k) = v_vooo(c,j,k,l)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(3)
+  do i = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do l = 1, nO
+          T_ovvo(l,a,b,i) = t2(i,l,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+                     
+  !v_vvoo(b,c,j,k) * t1(i,a) &
+  !X_vvoo(b,c,k,j) * T1_vo(a,i) &
+  
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do k = 1, nO
+      do c = 1, nV
+        do b = 1, nV
+          X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(1)
+  do i = 1, nO
+    do a = 1, nV
+      T_vo(a,i) = t1(i,a)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(ta)
+  energy = 0d0
+  do i = 1, nO
+    do j = 1, nO
+      do k = 1, nO
+        delta_ijk = f_o(i) + f_o(j) + f_o(k)
+        call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk)
+        call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk)
+        !$OMP PARALLEL &
+        !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) &
+        !$OMP PRIVATE(a,b,c,e,delta) &
+        !$OMP DEFAULT(NONE)
+        e = 0d0
+        !$OMP DO
+        do c = 1, nV
+          do b = 1, nV
+            do a = 1, nV
+              delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c))
+              !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c))  !delta_ooovvv(i,j,k,a,b,c)
+              e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) &
+                       * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta
+            enddo
+          enddo
+        enddo
+        !$OMP END DO
+        !$OMP CRITICAL
+        energy = energy + e
+        !$OMP END CRITICAL
+        !$OMP END PARALLEL
+      enddo
+    enddo
+    call wall_time(tb)
+    write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s'
+  enddo
+  
+  energy = energy / 3d0
+
+  deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo)
+  !deallocate(V,W)
+end
+
+! W_ijk
+
+subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV,i,j,k
+  !double precision, intent(in) :: t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO)
+  double precision, intent(in)  :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO)
+  double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO)
+  
+  integer :: l,a,b,c,d
+
+  !W = 0d0
+  !do i = 1, nO
+  !  do j = 1, nO
+  !    do k = 1, nO
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) &
+  !$OMP PRIVATE(a,b,c,d,l) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        W(a,b,c) = 0d0
+
+        do d = 1, nV
+          !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+          W(a,b,c) = W(a,b,c) &
+          ! chem (bd|ai)
+          ! phys <ba|di>
+          !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+          !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
+          !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
+          !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
+          !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
+          !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
+          + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) &
+          + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj
+          + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik
+          + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij
+          + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj
+          + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik
+        enddo
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+         
+        do l = 1, nO
+          !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+          W(a,b,c) = W(a,b,c) &
+          ! chem (ck|jl)
+          ! phys <cj|kl>
+          !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
+          !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
+          !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
+          !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
+          !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
+          !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
+          - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
+          - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj
+          - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik
+          - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij
+          - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj
+          - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik
+        enddo
+
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !    enddo
+  !  enddo
+  !enddo
+
+end
+
+! V_ijk
+
+subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v)
+
+implicit none
+
+  integer, intent(in)           :: nO,nV,i,j,k
+  !double precision, intent(in)  :: t1(nO,nV)
+  double precision, intent(in)  :: T_vo(nV,nO)
+  double precision, intent(in)  :: X_vvoo(nV,nV,nO,nO)
+  double precision, intent(in)  :: W(nV,nV,nV)!,nO,nO,nO)
+  double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO)
+
+  integer :: a,b,c
+
+  !V = 0d0
+  !do i = 1, nO
+  !  do j = 1, nO
+  !    do k = 1, nO
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) &
+  !$OMP PRIVATE(a,b,c) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
+        V(a,b,c) = W(a,b,c) &
+        !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
+        !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
+        !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c)
+        + X_vvoo(b,c,k,j) * T_vo(a,i) &
+        + X_vvoo(a,c,k,i) * T_vo(b,j) &
+        + X_vvoo(a,b,j,i) * T_vo(c,k)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !    enddo
+  !  enddo
+  !enddo
+
+end
diff --git a/src/ccsd/ccsd_t_spin_orb.irp.f b/src/ccsd/ccsd_t_spin_orb.irp.f
new file mode 100644
index 00000000..3f79e4a0
--- /dev/null
+++ b/src/ccsd/ccsd_t_spin_orb.irp.f
@@ -0,0 +1,376 @@
+! v1
+
+subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+  double precision, intent(in)  :: v_ooov(nO,nO,nO,nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO)
+  double precision, intent(out) :: energy
+
+  double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:)
+  double precision              :: e_t, e_st, e_dt, delta_abc, delta
+  integer                       :: i,j,k,l,m,a,b,c,d,e
+
+  allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV))
+
+  t3 = 0d0
+  
+  ! T3
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              do e = 1, nV
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+                  + t2(j,k,a,e) * v_vvvo(b,c,e,i) &
+                  - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
+                  - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
+                  - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
+                  - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
+                  + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
+                  + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
+                  + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
+                  + t2(j,i,c,e) * v_vvvo(b,a,e,k)   ! + P(ik) P(ac)
+              enddo
+              do m = 1, nO
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+                  + t2(m,i,b,c) * v_ooov(j,k,m,a) &
+                  - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
+                  - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
+                  - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
+                  - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
+                  + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
+                  + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
+                  + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
+                  + t2(m,k,b,a) * v_ooov(j,i,m,c)   ! + P(ik) P(ac)
+              enddo
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  
+
+  ! E_T
+  e_t = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  e_t = e_t / 36d0
+
+  ! E_ST
+  s = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  e_st = 0d0
+  do a = 1, nV
+    do i = 1, nO
+      e_st = e_st + s(i,a) * t1(i,a)
+    enddo
+  enddo
+  e_st = e_st * 0.25d0
+
+  ! E_DT
+  e_dt = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  e_dt = e_dt * 0.25d0
+
+  ! (T)
+  !print*,e_t,e_st,e_dt
+  energy = e_t + e_st + e_dt
+
+  deallocate(t3,s)
+  
+end
+
+! v2
+
+subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+  double precision, intent(in)  :: v_ooov(nO,nO,nO,nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO)
+  double precision, intent(out) :: energy
+
+  double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:)
+  double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:)
+  double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:)
+  double precision              :: e_st, delta_abc, delta, ta, tb
+  integer                       :: i,j,k,l,m,a,b,c,d,e
+
+  allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV))
+  allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV))
+
+  call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
+       cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+       cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+       nV,nV,nV,nO, v_vvvo)
+  
+  ! Init 
+  s = 0d0
+  e_t = 0d0
+  e_st = 0d0
+  e_dt = 0d0
+
+  call wall_time(ta)
+  !$OMP PARALLEL &
+  !$OMP PRIVATE(i,j,k,m,a,b,c,e) &
+  !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, &
+  !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do c = 1, nV
+    do b = 1, nV
+      do i = 1, nO
+        do e = 1, nV
+          A_vovv(e,i,b,c) = v_vvvo(b,c,e,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$omp do collapse(3)
+  do a = 1, nV
+    do k = 1, nO
+      do j = 1, nO
+        do e = 1, nV
+          T_voov(e,j,k,a) = t2(j,k,a,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do a = 1, nV
+    do k = 1, nO
+      do j = 1, nO
+        do m = 1, nO
+          B_ooov(m,j,k,a) = v_ooov(j,k,m,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+
+  do c = 1, nV
+    do b = 1, nV
+
+      ! T3(:,:,:,:,b,c)
+      ! Init
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              t3_bc(i,j,k,a) = 0d0
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              do e = 1, nV
+                t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
+                   !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) &
+                   !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
+                   !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
+                   !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
+                   !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
+                   !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
+                   !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
+                   !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
+                   !+ t2(j,i,c,e) * v_vvvo(b,a,e,k)   ! + P(ik) P(ac)
+                   + T_voov(e,j,k,a) * A_vovv(e,i,b,c) &
+                   - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij)
+                   - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik)
+                   - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab)
+                   - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac)
+                   + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab)
+                   + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac)
+                   + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab)
+                   + T_voov(e,j,i,c) * A_vovv(e,k,b,a)   ! + P(ik) P(ac)
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              do m = 1, nO
+                t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
+                   !+ t2(m,i,b,c) * v_ooov(j,k,m,a) &
+                   !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
+                   !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
+                   !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
+                   !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
+                   !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
+                   !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
+                   !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
+                   !+ t2(m,k,b,a) * v_ooov(j,i,m,c)   ! + P(ik) P(ac)
+                   + t2(m,i,b,c) * B_ooov(m,j,k,a) &
+                   - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij)
+                   - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik)
+                   - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab)
+                   - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac)
+                   + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab)
+                   + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac)
+                   + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab)
+                   + t2(m,k,b,a) * B_ooov(m,j,i,c)   ! + P(ik) P(ac)
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+
+      !$OMP DO
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+               delta = f_o(i) + f_o(j) + f_o(k) - delta_abc 
+               t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta)
+             enddo
+           enddo
+         enddo
+       enddo
+      !$OMP END DO
+
+      ! E_T
+      !$OMP DO
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO nowait
+
+      ! E_ST
+      !$OMP DO
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO nowait
+
+      ! E_DT
+      !$OMP DO
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+    enddo
+    !$OMP MASTER
+    call wall_time(tb)
+    write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s'
+    !$OMP END MASTER
+  enddo
+  !$OMP END PARALLEL
+
+  do a = 2, nV
+    e_t(1) = e_t(1) + e_t(a)
+  enddo
+  
+  do a = 2, nV
+    e_dt(1) = e_dt(1) + e_dt(a)
+  enddo
+
+  e_t = e_t / 36d0
+  
+  do a = 1, nV
+    do i = 1, nO
+      e_st = e_st + s(i,a) * t1(i,a)
+    enddo
+  enddo
+  e_st = e_st * 0.25d0
+
+  e_dt = e_dt * 0.25d0
+
+  ! (T)
+  !print*,e_t(1),e_st,e_dt(1)
+  energy = e_t(1) + e_st + e_dt(1)
+
+  deallocate(t3_bc,s)
+  
+end
diff --git a/src/ccsd/org/ccsd_space_orb.org b/src/ccsd/org/ccsd_space_orb.org
new file mode 100644
index 00000000..a848fd26
--- /dev/null
+++ b/src/ccsd/org/ccsd_space_orb.org
@@ -0,0 +1,2121 @@
+* ccsd with spatial orbitals
+
+Scuseria, Gustavo E.; Janssen, Curtis L.; Schaefer, Henry
+F. (1988). An efficient reformulation of the closed-shell coupled
+cluster single and double excitation (CCSD) equations. The Journal of
+Chemical Physics, 89(12), 7382–. doi:10.1063/1.455269
+
+* Code
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb.irp.f
+program ccsd
+
+  implicit none
+
+  read_wf = .True.
+  touch read_wf
+
+  call run_ccsd_space_orb
+  
+end
+#+end_src
+
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine run_ccsd_space_orb
+  
+  implicit none
+
+  integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d
+  integer :: u,v,gam,beta,tmp_gam,tmp_beta
+  integer :: nb_iter
+  double precision :: get_two_e_integral
+  double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
+  logical :: not_converged
+
+  double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:)
+  double precision, allocatable :: t1(:,:), r1(:,:)
+  double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
+  
+  double precision, allocatable :: all_err(:,:), all_t(:,:)
+  integer, allocatable          :: list_occ(:), list_vir(:)
+  integer(bit_kind)             :: det(N_int,2)
+  integer                       :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4)
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  det = psi_det(:,:,cc_ref)
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  !call extract_n_spin(det,n_spin)
+  nOa = cc_nOa !n_spin(1)
+  nOb = cc_nOb !n_spin(2)
+  nVa = cc_nVa !n_spin(3)
+  nVb = cc_nVb !n_spin(4)
+
+  ! Check that the reference is a closed shell determinant
+  if (cc_ref_is_open_shell) then
+    call abort
+  endif
+  
+  ! Number of occ/vir spatial orb
+  nO = nOa
+  nV = nVa
+  
+  allocate(list_occ(nO),list_vir(nV))
+  list_occ = cc_list_occ
+  list_vir = cc_list_vir
+  ! Debug
+  !call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
+  !print*,'occ',list_occ
+  !print*,'vir',list_vir
+  
+  allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
+  allocate(tau(nO,nO,nV,nV))
+  allocate(t1(nO,nV), r1(nO,nV))
+  allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
+
+  if (cc_update_method == 'diis') then
+    allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+    all_err = 0d0
+    all_t   = 0d0
+  endif
+
+  if (elec_alpha_num /= elec_beta_num) then
+    print*, 'Only for closed shell systems'
+    print*, 'elec_alpha_num=',elec_alpha_num
+    print*, 'elec_beta_num =',elec_beta_num
+    print*, 'abort'
+    call abort
+  endif
+
+  ! Init
+  call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
+  call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
+  call update_tau_space(nO,nV,t1,t2,tau)
+  !print*,'hf_energy', hf_energy
+  call det_energy(det,uncorr_energy)
+  print*,'Det energy', uncorr_energy
+  call ccsd_energy_space(nO,nV,tau,t1,energy)
+  print*,'Guess energy', uncorr_energy+energy, energy
+  
+  nb_iter = 0
+  not_converged = .True.
+  max_r1 = 0d0
+  max_r2 = 0d0
+
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(ta)
+  
+  do while (not_converged)
+    
+    call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
+    call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
+    call compute_H_vo(nO,nV,t1,t2,H_vo)
+
+    ! Residue
+    call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
+    call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
+    max_r = max(max_r1,max_r2)
+    
+    ! Update
+    if (cc_update_method == 'diis') then
+      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)
+
+    ! Standard update as T = T - Delta
+    elseif (cc_update_method == 'none') then
+      call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
+      call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
+    else
+      print*,'Unkonw cc_method_method: '//cc_update_method
+    endif
+
+    call update_tau_space(nO,nV,t1,t2,tau)
+
+    ! Energy
+    call ccsd_energy_space(nO,nV,tau,t1,energy)
+    write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
+
+    nb_iter = nb_iter + 1
+    if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
+      not_converged = .False.
+    endif
+    
+  enddo
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(tb)
+  print*,'Time: ',tb-ta, ' s'
+  print*,''
+  if (max_r < cc_thresh_conv) then
+    write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
+  else
+    write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
+  endif
+  print*,''
+  write(*,'(A15,F18.12,A3)') ' E(CCSD)     = ', uncorr_energy+energy, ' Ha'
+  write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
+  write(*,'(A15,1pE10.2,A3)')' Conv        = ', max_r
+  print*,''
+
+  call write_t1(nO,nV,t1)
+  call write_t2(nO,nV,t2)
+  
+  ! Deallocation
+  if (cc_update_method == 'diis') then
+    deallocate(all_err,all_t)
+  endif
+
+  deallocate(H_vv,H_oo,H_vo,r1,r2,tau)
+  
+  ! CCSD(T)
+  double precision :: e_t
+
+  if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then 
+
+    ! Dumb way
+    !call wall_time(ta)
+    !call ccsd_par_t_space(nO,nV,t1,t2,e_t)
+    !call wall_time(tb)
+    !print*,'Time: ',tb-ta, ' s'
+
+    !print*,''
+    !write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
+    !write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
+    !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
+    !print*,''
+
+    ! New
+    print*,'Computing (T) correction...'
+    call wall_time(ta)
+    call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
+         ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
+    call wall_time(tb)
+    print*,'Time: ',tb-ta, ' s'
+
+    print*,''
+    write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + e_t, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' E(T)        = ', e_t, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
+    print*,''
+  endif
+  
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+
+  deallocate(t1,t2)
+
+end
+#+END_SRC
+
+* Energy
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: t1(nO,nV)
+  double precision, intent(out) :: energy
+
+  ! internal
+  integer :: i,j,a,b
+  double precision :: e
+
+  energy = 0d0
+  !$omp parallel &
+  !$omp shared(nO,nV,energy,tau,t1,&
+  !$omp cc_space_f_vo,cc_space_w_oovv) &
+  !$omp private(i,j,a,b,e) &
+  !$omp default(none)
+  e = 0d0
+  !$omp do
+  do i = 1, nO
+    do a = 1, nV
+      e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp do
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
+       enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp critical
+  energy = energy + e
+  !$omp end critical
+  !$omp end parallel
+  
+end
+#+END_SRC
+
+* T
+** Tau
+#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine update_tau_space(nO,nV,t1,t2,tau)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+
+  ! out
+  double precision, intent(out) :: tau(nO,nO,nV,nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tau,t2,t1) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+    
+end
+#+end_src
+
+* Residual equations
+** R1
+*** R1
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)
+
+  ! out
+  double precision, intent(out) :: r1(nO,nV), max_r1
+  
+  ! internal
+  integer                       :: u,i,j,beta,a,b
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r1,cc_space_f_ov) &
+  !$omp private(u,beta) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do u = 1, nO
+      r1(u,beta) = cc_space_f_ov(u,beta)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a)
+  ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO)
+  ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV)
+  ! cc_space_f_vo(a,i) * t1(u,a)    -> X1(nO,nO), O(nO*nO*nV)
+  ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV)  
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_oo(:,:)
+  allocate(X_oo(nO,nO))
+  call dgemm('N','N', nO, nO, nV, &
+             -2d0, t1    , size(t1,1), &
+                   cc_space_f_vo, size(cc_space_f_vo,1), &
+              0d0, X_oo  , size(X_oo,1))
+
+  call dgemm('T','N', nO, nV, nO, &
+             1d0, X_oo, size(X_oo,2), &
+                  t1  , size(t1,1), &
+             1d0, r1  , size(r1,1)) 
+  deallocate(X_oo)
+
+  ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do a = 1, nV
+  !      r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a)
+  !    enddo
+  !  enddo
+  !enddo
+  call dgemm('N','N', nO, nV, nV, &
+             1d0, t1  , size(t1,1), &
+                  H_vv, size(H_vv,1), &
+             1d0, r1  , size(r1,1))
+
+  ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta)
+  !    enddo
+  !  enddo
+  !enddo
+  call dgemm('N','N', nO, nV, nO, &
+             -1d0, H_oo, size(H_oo,1), &
+                   t1  , size(t1,1), &
+              1d0, r1, size(r1,1))
+
+  !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta))
+  ! <=>
+  ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) + H_vo(a,i) * &
+  !        (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta))
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_voov(:,:,:,:)
+  allocate(X_voov(nV, nO, nO, nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,X_voov,t2,t1) &
+  !$omp private(u,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemv('T', nV*nO, nO*nV, &
+             1d0, X_voov, size(X_voov,1) * size(X_voov,2), &
+                  H_vo  , 1, &
+             1d0, r1    , 1)
+  
+  deallocate(X_voov)
+
+  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a)
+  ! <=> 
+  ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta)
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_ovov(:,:,:,:)
+  allocate(X_ovov(nO, nV, nO, nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) &
+  !$omp private(u,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do a = 1, nv
+        do i = 1, nO
+          X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemv('T', nO*nV, nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  t1     , 1, &
+             1d0, r1     , 1)
+  
+  deallocate(X_ovov)
+
+  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
+  ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) 
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do a = 1, nV
+  !        do b = 1, nV
+  !          r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:)
+  allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) &
+  !$omp private(b,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do i = 1, nO
+      do b = 1, nV
+        do a = 1, nV
+          W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)  
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do i = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          T_vvoo(a,b,i,u) = tau(i,u,a,b)  
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp end parallel
+
+  call dgemm('T','N',nO,nV,nO*nV*nV, &
+             1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), &
+                  W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), &
+             1d0, r1    , size(r1,1))
+  
+  deallocate(W_vvov,T_vvoo)
+
+  ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
+  ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) 
+  !do beta = 1, nV
+  !  do u = 1, nO
+  !    do i = 1, nO
+  !      do j = 1, nO
+  !        do a = 1, nV
+  !          r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: W_oovo(:,:,:,:)
+  allocate(W_oovo(nO,nO,nV,nO))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) &
+  !$omp private(u,a,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do u = 1, nO
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('T','N', nO, nV, nO*nO*nV, &
+             -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), &
+                   tau   , size(tau,1) * size(tau,2) * size(tau,3), &
+              1d0, r1    , size(r1,1))
+  
+  deallocate(W_oovo)
+
+  max_r1 = 0d0
+  do a = 1, nV
+    do i = 1, nO
+      if (dabs(r1(i,a)) > max_r1) then
+        max_r1 = dabs(r1(i,a))
+      endif
+    enddo
+  enddo
+
+  ! Change the sign for consistency with the code in spin orbitals
+  !$omp parallel &
+  !$omp shared(nO,nV,r1) &
+  !$omp private(a,i) &
+  !$omp default(none)
+  !$omp do 
+  do a = 1, nV
+    do i = 1, nO
+      r1(i,a) = -r1(i,a)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+#+end_src
+
+*** Intermediates
+**** H_oo
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: H_oo(nO, nO)
+
+  integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u
+
+  !H_oo = 0d0
+
+  !do i = 1, nO
+  !  do u = 1, nO
+  !    H_oo(u,i) = cc_space_f_oo(u,i)
+
+  !    do j = 1, nO
+  !      do a = 1, nV
+  !        do b = 1, nV
+  !          !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b)
+  !          !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b)
+  !          H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b)
+  !        enddo
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  ! H_oo(u,i) = cc_space_f_oo(u,i)
+  !$omp parallel &
+  !$omp shared(nO,H_oo,cc_space_f_oo) &
+  !$omp private(i,u) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do u = 1, nO
+      H_oo(u,i) = cc_space_f_oo(u,i)
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b)
+  ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b)
+  call dgemm('N','T', nO, nO, nO*nV*nV,       &
+             1d0, tau     , size(tau,1),      &
+                  cc_space_w_oovv, size(cc_space_w_oovv,1), &
+             1d0, H_oo    , size(H_oo,1))     
+  
+end
+#+END_SRC
+
+**** H_vv
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: H_vv(nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta
+
+  !H_vv = 0d0
+
+  !do beta = 1, nV
+  !  do a = 1, nV
+  !    H_vv(a,beta) = cc_space_f_vv(a,beta)
+
+  !    do j = 1, nO
+  !      do i = 1, nO
+  !        do b = 1, nV
+  !          !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b)
+  !          H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b)
+  !        enddo
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: tmp_tau(:,:,:,:)
+
+  allocate(tmp_tau(nV,nO,nO,nV))
+
+  ! H_vv(a,beta) = cc_space_f_vv(a,beta)
+  !$omp parallel &
+  !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) &
+  !$omp private(a,beta,i,j,b) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do a = 1, nV
+      H_vv(a,beta) = cc_space_f_vv(a,beta)
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b)
+  ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta)
+  
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do b = 1, nV
+          tmp_tau(b,i,j,beta) = tau(i,j,beta,b) 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nV,nV,nO*nO*nV,           &
+             -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), &
+                   tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), &
+              1d0, H_vv    , size(H_vv,1))
+
+  deallocate(tmp_tau)
+  
+end
+#+END_SRC
+
+**** H_vo
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_H_vo(nO,nV,t1,t2,H_vo)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: H_vo(nV, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta
+
+  !H_vo = 0d0
+
+  !do i = 1, nO
+  !  do a = 1, nV
+  !    H_vo(a,i) = cc_space_f_vo(a,i)
+
+  !    do j = 1, nO
+  !      do b = 1, nV
+  !        !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b)
+  !        H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
+  !      enddo
+  !    enddo
+  !    
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: w(:,:,:,:)
+
+  allocate(w(nV,nO,nO,nV))
+
+  !$omp parallel &
+  !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) &
+  !$omp private(a,beta,i,j,b) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do a = 1, nV
+      H_vo(a,i) = cc_space_f_vo(a,i)
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
+  ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b)
+
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemv('N',nV*nO, nO*nV, &
+             1d0, w   , size(w,1) * size(w,2), &
+                  t1  , 1, &
+             1d0, H_vo, 1)
+
+  deallocate(w)
+  
+end
+#+END_SRC
+
+** R2
+*** R2
+#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)  :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)
+
+  ! out
+  double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2
+
+  ! internal
+  double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:)
+  double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:)
+  integer                       :: u,v,i,j,beta,gam,a,b
+
+  allocate(g_occ(nO,nO), g_vir(nV,nV))
+  allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV))
+  allocate(A1(nO,nO,nO,nO))
+  
+  call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
+  call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
+  call compute_A1(nO,nV,t1,t2,tau,A1)
+  call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, &
+       cc_space_v_vvvo,cc_space_v_vvoo,J1)
+  call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, &
+       cc_space_v_ovov,cc_space_v_vvov,K1)
+
+  ! Residual
+  !r2 = 0d0
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,cc_space_v_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+         r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do j = 1, nO
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + A1(u,v,i,j) * tau(i,j,beta,gam)
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
+             1d0, A1, size(A1,1) * size(A1,2), &
+                  tau, size(tau,1) * size(tau,2), &
+             1d0, r2, size(r2,1) * size(r2,2))
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do b = 1, nv
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + B1(a,b,beta,gam) * tau(u,v,a,b)
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate(B1(nV,nV,nV,nV))
+  call compute_B1(nO,nV,t1,t2,B1)
+  call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
+             1d0, tau, size(tau,1) * size(tau,2), &
+                  B1 , size(B1,1) * size(B1,2), &
+             1d0, r2, size(r2,1) * size(r2,2))
+  deallocate(B1)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         + g_vir(a,beta) * t2(u,v,a,gam) &
+  !         + g_vir(a,gam)  * t2(v,u,a,beta) ! P
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:)
+  allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,t2,X_oovv) &
+  !$omp private(u,v,gam,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do a = 1, nV
+    do gam = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          X_oovv(u,v,gam,a) = t2(u,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nO*nV,nV,nV, &
+             1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
+                  g_vir, size(g_vir,1), &
+             0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Y_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         - g_occ(u,i) * t2(i,v,beta,gam) &
+  !         - g_occ(v,i) * t2(i,u,gam,beta) ! P
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO,nO*nV*nV,nO, &
+             1d0, g_occ , size(g_occ,1), &
+                  t2    , size(t2,1), &
+             0d0, X_oovv, size(X_oovv,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_oovv)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        do a = 1, nV
+  !          r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !          + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) &
+  !          + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_vovv(:,:,:,:)
+  allocate(X_vovv(nV,nO,nV,nV))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) &
+  !$omp private(u,a,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do u = 1, nO
+        do a = 1, nV
+          X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO,nO*nV*nV,nV, &
+             1d0, t1    , size(t1,1), &
+                  X_vovv, size(X_vovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Y_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        do a = 1, nV
+  !          do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - cc_space_v_ovov(u,a,i,gam)  * t1(i,beta) * t1(v,a) &
+  !           - cc_space_v_ovov(v,a,i,beta) * t1(i,gam)  * t1(u,a) ! P
+  !          enddo
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:)
+  allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) &
+  !$omp private(u,v,gam,i) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do u = 1, nO
+        do a = 1, nV
+          X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nV*nO*nV,nV,nO, &
+              1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), &
+                   t1    , size(t1,1), &
+              0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3))
+
+  call dgemm('N','N',nO,nO*nV*nV,nV, &
+             1d0, t1, size(t1,1), &
+                  Y_vovv, size(Y_vovv,1), &
+             0d0, X_oovv, size(X_oovv,1))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+          r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_vovo,Y_vovv)
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !         - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) &
+  !         - cc_space_v_oovo(v,u,gam,i)  * t1(i,beta) ! P
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO*nO*nV,nV,nO, &
+             1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), &
+                  t1 , size(t1,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do i = 1, nO
+  !         do a = 1, nV
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) &
+  !           - cc_space_v_ovvo(v,a,gam,i)  * t1(u,a) * t1(i,beta) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: Y_oovo(:,:,:,:)
+  allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) &
+  !$omp private(a,v,gam,i) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do v = 1, nO
+        do a = 1, nV
+          X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO,nO*nV*nO,nV, &
+             1d0, t1, size(t1,1), &
+                  X_vovo, size(X_vovo,1), &
+             0d0, Y_oovo, size(Y_oovo,1))
+
+  call dgemm('N','N',nO*nO*nV, nV, nO, &
+             1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), &
+                  t1    , size(t1,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,X_oovv) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vovo,Y_oovo)
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * &
+  !             (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) &
+  !           + 0.5d0 * (2d0 * J1(v,a,gam,i)  - K1(v,a,i,gam)) * &
+  !             (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:)
+  allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV))
+  !$omp parallel &
+  !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) &
+  !$omp private(u,v,gam,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do v = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N', nO*nV,nO*nV,nV*nO, &
+             1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
+                  Y_voov, size(Y_voov,1) * size(Y_voov,2), &
+             0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_ovvo,Y_voov)
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) &
+  !           - 0.5d0 * K1(v,a,i,gam)  * t2(i,u,beta,a) !P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:)
+  allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV))
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) &
+  !$omp private(u,a,i,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do u = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do v = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          Y_ovov(i,a,v,gam) = t2(i,v,gam,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('T','N',nO*nV,nO*nV,nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+             0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !       do a = 1, nV
+  !         do i = 1, nO
+  !           r2(u,v,beta,gam) = r2(u,v,beta,gam) &
+  !           - K1(u,a,i,gam)  * t2(i,v,beta,a) &
+  !           - K1(v,a,i,beta) * t2(i,u,gam,a) ! P
+  !         enddo
+  !       enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) &
+  !$omp private(u,v,gam,beta,i,a) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do a = 1, nV
+    do i = 1, nO
+      do gam = 1, nV
+        do u = 1, nO
+          X_ovov(u,gam,i,a) = K1(u,a,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do v = 1, nO
+      do a = 1, nV
+        do i = 1, nO
+          Z_ovov(i,a,v,beta) = t2(i,v,beta,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nV,nO*nV,nO*nV, &
+             1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
+                  Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+             0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,r2,Z_ovov) &
+  !$omp private(u,v,gam,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do v = 1, nO
+        do u = 1, nO
+           r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_ovov,Y_ovov,Z_ovov)
+  
+  ! Change the sign for consistency with the code in spin orbitals
+  !$omp parallel &
+  !$omp shared(nO,nV,r2) &
+  !$omp private(i,j,a,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          r2(i,j,a,b) = -r2(i,j,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  max_r2 = 0d0
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          if (dabs(r2(i,j,a,b)) > max_r2) then
+            max_r2 = dabs(r2(i,j,a,b))
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(g_occ,g_vir,J1,K1,A1)
+  
+end
+#+end_src
+
+*** Intermediates
+**** A1
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_A1(nO,nV,t1,t2,tau,A1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: tau(nO, nO, nV, nV)
+  double precision, intent(out) :: A1(nO, nO, nO, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta
+
+  !A1 = 0d0
+
+  !do j = 1, nO
+  !  do i = 1, nO
+  !    do v = 1, nO
+  !      do u = 1, nO
+  !        A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+
+  !        do a = 1, nV
+  !          A1(u,v,i,j) = A1(u,v,i,j) &
+  !          + cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
+  !          + cc_space_v_vooo(a,v,i,j) * t1(u,a)
+  !          
+  !          do b = 1, nV
+  !            A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
+  !          enddo  
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:)
+  allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO))
+  
+  ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+  !$omp parallel &
+  !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) &
+  !$omp private(u,v,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do v = 1, nO
+        do u = 1, nO
+          A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do u = 1, nO
+        do a = 1, nV
+          X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N', nO, nO*nO*nO, nV, &
+             1d0, t1    , size(t1,1), &
+                  X_vooo, size(X_vooo,1), &
+             0d0, Y_oooo, size(Y_oooo,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,A1,Y_oooo) &
+  !$omp private(u,v,i,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do v = 1, nO
+        do u = 1, nO
+          A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vooo,Y_oooo)
+
+  ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a)
+  call dgemm('N','N', nO, nO*nO*nO, nV, &
+             1d0, t1      , size(t1,1), &
+                  cc_space_v_vooo, size(cc_space_v_vooo,1), &
+             1d0, A1      , size(A1,1))
+
+  ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
+  call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
+             1d0, tau     , size(tau,1) * size(tau,2), &
+                  cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
+             1d0, A1      , size(A1,1) * size(A1,2))
+   
+end
+#+END_SRC
+
+**** B1
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_B1(nO,nV,t1,t2,B1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: B1(nV, nV, nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !B1 = 0d0
+
+  !do gam = 1, nV
+  !  do beta = 1, nV
+  !    do b = 1, nV
+  !      do a = 1, nV
+  !        B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+
+  !        do i = 1, nO
+  !          B1(a,b,beta,gam) = B1(a,b,beta,gam) &
+  !          - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+  !          - cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+  
+  double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:)
+  allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV))
+
+  ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+  !$omp parallel &
+  !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) &
+  !$omp private(a,b,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  !$omp do collapse(3)
+  do i = 1, nO
+    do gam = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+  call dgemm('N','N', nV*nV*nV, nV, nO, &
+             -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), &
+                   t1      , size(t1,1), &
+              1d0, B1      , size(B1,1) * size(B1,2) * size(B1,3))
+
+  
+  ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+  call dgemm('N','N', nV*nV*nV, nV, nO, &
+             -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), &
+                   t1    , size(t1,1), &
+              0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3))
+
+  !$omp parallel &
+  !$omp shared(nV,B1,Y_vvvv) &
+  !$omp private(a,b,beta,gam) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do gam = 1, nV
+    do beta = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  deallocate(X_vvvo,Y_vvvv)
+  
+end
+#+END_SRC
+
+**** g_occ
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV), H_oo(nO, nO)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: g_occ(nO, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !g_occ = 0d0
+
+  !do i = 1, nO
+  !  do u = 1, nO
+  !    g_occ(u,i) = H_oo(u,i)
+  !    
+  !    do a = 1, nV
+  !      g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a)
+  !      
+  !      do j = 1, nO
+  !        g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a)
+  !      enddo
+  !         
+  !    enddo
+  !  enddo
+  !enddo
+
+  call dgemm('N','N',nO,nO,nV, &
+             1d0, t1, size(t1,1), &
+                  cc_space_f_vo, size(cc_space_f_vo,1), &
+             0d0, g_occ, size(g_occ,1))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) &
+  !$omp private(i,j,a,u) &
+  !$omp default(none)
+  !$omp do
+  do i = 1, nO
+    do u = 1, nO
+      g_occ(u,i) = g_occ(u,i) + H_oo(u,i)
+    enddo
+  enddo
+  !$omp end do
+  
+  !$omp do collapse(1)
+  do i = 1, nO
+    do j = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+#+END_SRC
+
+**** g_vir
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV), H_vv(nV, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: g_vir(nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !g_vir = 0d0
+
+  !do beta = 1, nV
+  !  do a = 1, nV
+  !    g_vir(a,beta) = H_vv(a,beta)
+  !    
+  !    do i = 1, nO
+  !      g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta)
+  !      
+  !      do b = 1, nV
+  !        g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b)
+  !      enddo
+  !         
+  !    enddo
+  !  enddo
+  !enddo
+  
+  call dgemm('N','N',nV,nV,nO, &
+             -1d0, cc_space_f_vo , size(cc_space_f_vo,1), &
+                   t1   , size(t1,1), &
+              0d0, g_vir, size(g_vir,1))
+      
+  !$omp parallel &
+  !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) &
+  !$omp private(i,b,a,beta) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do a = 1, nV
+      g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta)
+    enddo
+  enddo
+  !$omp end do
+
+  !$omp do collapse(1)
+  do beta = 1, nV
+    do i = 1, nO
+      do b = 1, nV
+        do a = 1, nV
+          g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+end
+#+END_SRC
+
+**** J1
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO)
+  double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO)
+  double precision, intent(out) :: J1(nO, nV, nV, nO)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !J1 = 0d0
+
+  !do i = 1, nO
+  !  do beta = 1, nV
+  !    do a = 1, nV
+  !      do u = 1, nO
+  !       J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i)
+
+  !        do j = 1, nO
+  !          J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !          - cc_space_v_ovoo(u,a,j,i) * t1(j,beta)
+  !        enddo
+
+  !        do b = 1, nV
+  !          J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !          + cc_space_v_vvvo(b,a,beta,i) * t1(u,b)    
+  !        enddo
+
+  !        do j = 1, nO
+  !          do b = 1, nV
+  !           J1(u,a,beta,i) = J1(u,a,beta,i) &
+  !           - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) &
+  !           + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
+  !          enddo
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:)
+  allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) &
+  !$omp private(i,j,a,u,beta) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = v_ovvo(u,a,beta,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','N',nO*nV*nO,nV,nO, &
+            -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), &
+                  t1    , size(t1,1), &
+             0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Y_ovov) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  deallocate(X_ovoo)
+
+  ! v_vvvo(b,a,beta,i) * t1(u,b)
+  call dgemm('N','N',nO,nV*nV*nO,nV, &
+             1d0, t1    , size(t1,1), &
+                  v_vvvo, size(v_vvvo,1), &
+             1d0, J1    , size(J1,1))
+
+  !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) &
+  double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:)
+  allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO))
+  !$omp parallel &
+  !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) &
+  !$omp private(i,beta,a,u,b,j) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do beta = 1, nV
+        do u = 1, nO
+          Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do i = 1, nO
+        do a = 1, nV
+          X_voov(a,i,j,b) = v_vvoo(a,b,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','T',nO*nV,nV*nO,nO*nV, &
+             -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
+                   X_voov, size(X_voov,1) * size(X_voov,2), &
+              0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2))
+  deallocate(X_voov)
+
+  double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:)
+  allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO))
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) &
+  !$omp private(i,beta,a,u,j,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  
+  !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do i = 1, nO
+        do a = 1, nV
+          Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+  
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          X_ovvo(u,beta,b,j) = t2(u,j,beta,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+  
+  call dgemm('N','T',nO*nV,nV*nO,nV*nO, &
+             1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
+                  Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), &
+             0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,J1,Z_ovvo) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do i = 1, nO
+    do beta = 1, nV
+      do a = 1, nV
+        do u = 1, nO
+          J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_ovvo,Z_ovvo,Y_ovov)  
+  
+end
+#+END_SRC
+
+**** K1
+#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f
+subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV)
+  double precision, intent(in)  :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO)
+  double precision, intent(out) :: K1(nO, nV, nO, nV)
+
+  double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam
+
+  !K1 = 0d0
+
+  !do beta = 1, nV
+  !  do i = 1, nO
+  !    do a = 1, nV
+  !      do u = 1, nO
+  !        K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta)
+
+  !        do j = 1, nO
+  !          K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !          - cc_space_v_ovoo(u,a,i,j) * t1(j,beta)
+  !        enddo
+
+  !        do b = 1, nV
+  !          K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !          + cc_space_v_vvov(b,a,i,beta) * t1(u,b)    
+  !        enddo
+
+  !        do j = 1, nO
+  !          do b = 1, nV
+  !           K1(u,a,i,beta) = K1(u,a,i,beta) &
+  !           - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta))
+  !          enddo
+  !        enddo
+  !        
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO))
+  
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) &
+  !$omp private(i,beta,a,u,j,b) &
+  !$omp default(none)
+  !$omp do collapse(3)
+  do beta = 1, nV
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          K1(u,a,i,beta) = v_ovov(u,a,i,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do j = 1, nO
+        do b = 1, nV
+          X(b,j,a,i) = - v_vvoo(b,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do beta = 1, nV
+        do u = 1, nO
+          Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  call dgemm('N','N',nO*nV*nO,nV,nO, &
+            -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), &
+                  t1    , size(t1,1), &
+            1d0, K1    , size(K1,1) * size(K1,2) * size(K1,3))
+
+  call dgemm('N','N',nO,nV*nO*nV,nV, &
+             1d0, t1    , size(t1,1), &
+                  v_vvov, size(v_vvov,1), &
+             1d0, K1    , size(K1,1))
+
+  ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i)
+  call dgemm('N','N',nV*nO,nO*nV,nV*nO, &
+             1d0, Y, size(Y,1) * size(Y,2), &
+                  X, size(X,1) * size(X,2), &
+             0d0, Z, size(Z,1) * size(Z,2))
+
+  !$omp parallel &
+  !$omp shared(nO,nV,K1,Z) &
+  !$omp private(i,beta,a,u) &
+  !$omp default(none)
+  !$omp do collapse(3)
+   do beta = 1, nV
+    do i = 1, nO
+      do a = 1, nV
+        do u = 1, nO
+          K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X,Y,Z)
+  
+end
+#+END_SRC
+
diff --git a/src/ccsd/org/ccsd_spin_orb.org b/src/ccsd/org/ccsd_spin_orb.org
new file mode 100644
index 00000000..8fb403c3
--- /dev/null
+++ b/src/ccsd/org/ccsd_spin_orb.org
@@ -0,0 +1,2352 @@
+* CCSD spin orb
+** Ref
+A direct product decomposition approach for symmetry exploitation in manybody
+methods. I. Energy calculations
+John F. Stanton, Jürgen Gauss, John D. Watts, and Rodney J. Bartlett
+The Journal of Chemical Physics 94, 4334 (1991)
+http://dx.doi.org/10.1063/1.460620A
+
+** Prog
+#+begin_src f90 :comments org :tangle ccsd_spin_orb.irp.f
+program ccsd
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD in spin orbitals
+  END_DOC
+
+  read_wf = .True.
+  touch read_wf
+
+  call run_ccsd_spin_orb
+  
+end 
+#+end_src
+
+** Code
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine run_ccsd_spin_orb
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD in spin orbitals
+  END_DOC
+
+  double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:)
+  double precision, allocatable :: r1(:,:), r2(:,:,:,:)
+  double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:)
+  double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:)
+  
+  double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:)
+  double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:)
+  double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:)
+  double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:)
+  double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:)
+  double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:)
+  double precision, allocatable :: v_vvvv(:,:,:,:)
+
+  double precision, allocatable :: all_err(:,:), all_t(:,:)
+
+  logical                       :: not_converged
+  integer, allocatable          :: list_occ(:,:), list_vir(:,:)
+  integer                       :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4)
+  integer                       :: nb_iter, i,j,a,b
+  double precision              :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
+  integer(bit_kind)             :: det(N_int,2)
+
+  det = psi_det(:,:,cc_ref)
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+ 
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  !call extract_n_spin(det,n_spin)
+  nOa = cc_nOa !n_spin(1)
+  nOb = cc_nOb !n_spin(2)
+  nVa = cc_nVa !n_spin(3)
+  nVb = cc_nVb !n_spin(4)
+
+  ! Total number of occ/vir spin orb
+  nO = cc_nOab !nOa + nOb
+  nV = cc_nVab !nVa + nVb
+  ! Debug
+  !print*,nO,nV
+
+  ! Number of occ/vir spin orb per spin
+  nO_S = cc_nO_S !(/nOa,nOb/)
+  nV_S = cc_nV_S !(/nVa,nVb/)
+  ! Debug
+  !print*,nO_S,nV_S
+
+  ! Maximal number of occ/vir 
+  nO_m = cc_nO_m !max(nOa, nOb)
+  nV_m = cc_nV_m !max(nVa, nVb)
+  ! Debug
+  !print*,nO_m,nV_m
+  
+  allocate(list_occ(nO_m,2), list_vir(nV_m,2))
+  list_occ = cc_list_occ_spin
+  list_vir = cc_list_vir_spin
+  ! Debug
+  !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir)
+  !print*,list_occ(:,1)
+  !print*,list_occ(:,2)
+  !print*,list_vir(:,1)
+  !print*,list_vir(:,2)
+
+  ! Allocation
+  allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV))
+  allocate(r1(nO,nV), r2(nO,nO,nV,nV))
+  allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV))
+  allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV))
+  allocate(v_oooo(nO,nO,nO,nO))
+  !allocate(v_vooo(nV,nO,nO,nO))
+  allocate(v_ovoo(nO,nV,nO,nO))
+  allocate(v_oovo(nO,nO,nV,nO))
+  allocate(v_ooov(nO,nO,nO,nV))
+  allocate(v_vvoo(nV,nV,nO,nO))
+  !allocate(v_vovo(nV,nO,nV,nO))
+  !allocate(v_voov(nV,nO,nO,nV))
+  allocate(v_ovvo(nO,nV,nV,nO))
+  allocate(v_ovov(nO,nV,nO,nV))
+  allocate(v_oovv(nO,nO,nV,nV))
+  !allocate(v_vvvo(nV,nV,nV,nO))
+  !allocate(v_vvov(nV,nV,nO,nV))
+  !allocate(v_vovv(nV,nO,nV,nV))
+  !allocate(v_ovvv(nO,nV,nV,nV))
+  !allocate(v_vvvv(nV,nV,nV,nV))
+  allocate(f_o(nO), f_v(nV))
+  allocate(f_oo(nO, nO))
+  allocate(f_ov(nO, nV))
+  allocate(f_vv(nV, nV))
+  
+  ! Allocation for the diis
+  if (cc_update_method == 'diis') then
+    allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+    all_err = 0d0
+    all_t   = 0d0
+  endif
+
+  ! Fock elements
+  call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo)
+  call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov)
+  call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv)
+
+  ! Diag elements
+  do i = 1, nO
+    f_o(i) = f_oo(i,i)
+  enddo
+  do i = 1, nV
+    f_v(i) = f_vv(i,i)
+  enddo
+
+  ! Bi electronic integrals from list
+  call wall_time(ti)
+  ! OOOO
+  call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo)
+
+  ! OOO V
+  !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo)
+  call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo)
+  call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov)
+
+  ! OO VV
+  call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo)
+  !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov)
+  call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov)
+  call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv)
+
+  ! O VVV
+  !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo)
+  !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv)
+  !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv)
+
+  ! VVVV
+  !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv)
+  call wall_time(tf)
+  if (cc_dev) then
+    print*,'Load bi elec int:',tf-ti,'s'
+  endif
+
+  ! Init of T
+  t1 = 0d0
+  call guess_t1(nO,nV,f_o,f_v,f_ov,t1)
+  call guess_t2(nO,nV,f_o,f_v,v_oovv,t2)
+  call compute_tau_spin(nO,nV,t1,t2,tau)
+  call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+  
+  ! Loop init
+  nb_iter = 0
+  not_converged = .True.
+  r1 = 0d0
+  r2 = 0d0
+  max_r1 = 0d0
+  max_r2 = 0d0
+
+  call det_energy(det,uncorr_energy)
+  print*,'Det energy', uncorr_energy
+  call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+  print*,'guess energy', uncorr_energy+energy, energy
+  
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+
+  call wall_time(ta)
+
+  ! Loop
+  do while (not_converged)
+
+    ! Intermediates
+    call wall_time(tbi)
+    call wall_time(ti)
+    call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
+    call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
+    call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute cFs:',tf-ti,'s'
+    endif
+    
+    call wall_time(ti)
+    call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
+    call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
+    !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute cFs:',tf-ti,'s'
+    endif
+
+    ! Residuals
+    call wall_time(ti)
+    call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute r1:',tf-ti,'s'
+    endif
+    call wall_time(ti)
+    call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Compute r2:',tf-ti,'s'
+    endif
+
+    ! Max elements in the residuals
+    max_r1 = maxval(abs(r1(:,:)))
+    max_r2 = maxval(abs(r2(:,:,:,:)))
+    max_r  = max(max_r1,max_r2)
+
+    call wall_time(ti)
+    ! Update
+    if (cc_update_method == 'diis') then
+      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+
+    ! Standard update as T = T - Delta
+    elseif (cc_update_method == 'none') then
+      call update_t1(nO,nV,f_o,f_v,r1,t1)
+      call update_t2(nO,nV,f_o,f_v,r2,t2)
+    else
+      print*,'Unkonw cc_method_method: '//cc_update_method
+    endif
+
+    call compute_tau_spin(nO,nV,t1,t2,tau)
+    call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+    call wall_time(tf)
+    if (cc_dev) then
+      print*,'Update:',tf-ti,'s'
+    endif
+
+    ! Print
+    call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+    call wall_time(tfi)
+    
+    write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', &
+         uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
+    if (cc_dev) then
+      print*,'Total:',tfi-tbi,'s'
+    endif
+
+    ! Convergence
+    nb_iter = nb_iter + 1
+    if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
+      not_converged = .False.
+    endif
+
+  enddo
+  write(*,'(A77)') ' -----------------------------------------------------------------------------'
+  call wall_time(tb)
+  print*,'Time: ',tb-ta, ' s'
+  print*,''
+  if (max_r < cc_thresh_conv) then
+    write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations'
+  else
+    write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations'
+  endif
+  print*,''
+  write(*,'(A15,F18.12,A3)') ' E(CCSD)     = ', uncorr_energy+energy, ' Ha'
+  write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
+  write(*,'(A15,1pE10.2,A3)')' Conv        = ', max_r
+  print*,''
+
+  call write_t1(nO,nV,t1)
+  call write_t2(nO,nV,t2)
+
+  ! Deallocate
+  if (cc_update_method == 'diis') then
+     deallocate(all_err,all_t)
+  endif
+  deallocate(tau,tau_t)
+  deallocate(r1,r2)
+  deallocate(cF_oo,cF_ov,cF_vv)
+  deallocate(cW_oooo,cW_ovvo)!,cW_vvvv)
+  deallocate(v_oooo)
+  deallocate(v_ovoo,v_oovo)
+  deallocate(v_ovvo,v_ovov,v_oovv)
+  
+  if (cc_par_t .and. elec_alpha_num  +elec_beta_num > 2) then
+    double precision :: t_corr
+    print*,'CCSD(T) calculation...'
+    call wall_time(ta)
+    !allocate(v_vvvo(nV,nV,nV,nO))
+    !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
+    !   cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+    !   cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+    !   nV,nV,nV,nO, v_vvvo)
+
+    !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr)
+    call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr)
+    !print*,'Working on it...'
+    !call abort
+    call wall_time(tb)
+    print*,'Done'
+    print*,'Time: ',tb-ta, ' s'
+    print*,''
+    write(*,'(A15,F18.12,A3)') ' E(CCSD(T))  = ', uncorr_energy + energy + t_corr, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' E(T)        = ', t_corr, ' Ha'
+    write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha'
+    print*,''
+  endif
+  print*,'Reference determinant:'
+  call print_det(det,N_int)
+  
+  deallocate(f_oo,f_ov,f_vv,f_o,f_v)
+  deallocate(v_ooov,v_vvoo,t1,t2)
+  !deallocate(v_ovvv,v_vvvo,v_vovv)
+  !deallocate(v_vvvv)
+  
+end
+#+end_src
+
+* Energy
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy)
+
+  implicit none
+
+  BEGIN_DOC
+  ! CCSD energy in spin orbitals
+  END_DOC
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: energy
+
+  integer                       :: i,j,a,b
+
+
+  energy = 0d0
+
+  do i=1,nO
+      do a=1,nV
+      energy = energy + Fov(i,a) * t1(i,a)
+    end do
+  end do
+
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          energy = energy                                & 
+                 + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) &
+                 + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+
+end
+#+end_src
+
+* T
+** Update
+*** Tau
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_tau_spin(nO,nV,t1,t2,tau)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: tau(nO,nO,nV,nV)
+  
+  integer                       :: i,j,k,l
+  integer                       :: a,b,c,d
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tau,t1,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+#+end_src
+
+*** Tau_t
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: tau_t(nO,nO,nV,nV)
+
+  integer                       :: i,j,k,l
+  integer                       :: a,b,c,d
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tau_t,t1,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i=1,nO
+    do j=1,nO
+      do a=1,nV
+        do b=1,nV
+          tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a))
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+#+end_src
+
+* R
+** R1
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: f_o(nO), f_v(nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: cF_oo(nO,nO)
+  double precision,intent(in)   :: cF_ov(nO,nV)
+  double precision,intent(in)   :: cF_vv(nV,nV)
+  double precision,intent(in)   :: v_oovo(nO,nO,nV,nO)
+  double precision,intent(in)   :: v_ovov(nO,nV,nO,nV)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: r1(nO,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  !double precision, allocatable :: X_vovv(:,:,:,:)
+  double precision, allocatable :: X_oovv(:,:,:,:)
+  double precision              :: accu
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, &
+  !$OMP v_ovov,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(1)
+  do a=1,nV
+    do i=1,nO
+      r1(i,a) = Fov(i,a)
+      do e=1,nV
+        do m=1,nO
+          r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e)
+        end do
+      end do
+      do f=1,nV
+        do n=1,nO
+          r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do a=1,nV
+  !  do i=1,nO
+  !    do e=1,nV
+  !      r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nV, nV, &
+             1d0, t1   , size(t1,1), &
+                  cF_vv, size(cF_vv,1), &
+             1d0, r1   , size(r1,1))
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do m=1,nO
+  !      r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO, nV, nO, &
+             -1d0, cF_oo, size(cF_oo,1), &
+                   t1   , size(t1,1), &
+              1d0, r1   , size(r1,1))
+
+  !do a=1,nV
+  !  do i=1,nO
+  !    do f=1,nV
+  !      do e=1,nV
+  !        do m=1,nO
+  !          r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !allocate(X_vovv(nV,nO,nV,nV))
+  double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:)
+  allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV))
+
+  do f = 1, nV
+    call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+           cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+           nO,nV,nV, v_ovvf)
+    !$OMP PARALLEL &
+    !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) &
+    !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+    !$OMP DEFAULT(NONE)
+    
+
+    !$OMP DO collapse(3)
+    !do f = 1, nV
+      do e = 1, nV
+         do m = 1, nO
+           do a = 1, nV
+             !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f)
+             X_vovf(a,m,e) = v_ovvf(m,a,e)
+          enddo
+        enddo
+      enddo
+    !enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+      
+    call dgemm('N','T', nO, nV, nO*nV, &
+             -0.5d0, t2(1,1,1,f), size(t2,1), &
+                     X_vovf, size(X_vovf,1), &
+              1d0  , r1    , size(r1,1))
+  enddo
+  
+  !call dgemm('N','T', nO, nV, nO*nV*nV, &
+  !           -0.5d0, t2    , size(t2,1), &
+  !                   X_vovv, size(X_vovv,1), &
+  !            1d0  , r1    , size(r1,1))
+  
+  deallocate(X_vovf)
+  !deallocate(X_vovv)
+  allocate(X_oovv(nO,nO,nV,nV))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,t2,X_oovv, &
+  !$OMP f_o,f_v,v_oovo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do e=1,nV
+  !      do m=1,nO
+  !        do n=1,nO
+  !          r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  !$OMP DO collapse(3)
+  do a = 1, nV
+    do e = 1, nV
+      do m = 1, nO
+        do n = 1, nO
+          X_oovv(n,m,e,a) = t2(m,n,a,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO, nV, nO*nO*nV, &
+             -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), &
+                     X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
+             1d0   , r1    , size(r1,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do a = 1, nV
+    do i = 1, nO
+      r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_oovv)
+
+end
+#+end_src
+
+** R2
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: cF_oo(nO,nO)
+  double precision,intent(in)   :: cF_ov(nO,nV)
+  double precision,intent(in)   :: cF_vv(nV,nV)
+  double precision,intent(in)   :: f_o(nO), f_v(nV)
+  double precision,intent(in)   :: cW_oooo(nO,nO,nO,nO)
+  !double precision,intent(in)   :: cW_vvvv(nV,nV,nV,nV)
+  double precision,intent(in)   :: cW_ovvo(nO,nV,nV,nO)
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovoo(nO,nV,nO,nO)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovvo(nO,nV,nV,nO)
+  !double precision,intent(in)   :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV)
+
+  double precision,intent(out)  :: r2(nO,nO,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: X_vvoo(:,:,:,:)
+  !double precision, allocatable :: A_vvov(:,:,:,:)
+  double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:)
+  double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:)
+  double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:)
+  double precision, allocatable :: A_vv(:,:)
+  double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:)
+  double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+  !        r2(i,j,a,b) = v_oovv(i,j,a,b)
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e)
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(X_oovv(nO,nO,nV,nV))
+  call dgemm('N','T',nO*nO*nV, nV, nV, &
+             1d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                  cF_VV , size(cF_vv,1), &
+             0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !deallocate(X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV))
+  call dgemm('T','N', nV, nV, nO, &
+             1d0, t1   , size(t1,1), &
+                  cF_ov, size(cF_ov,1), &
+             0d0, A_vv , size(A_vv,1))
+
+  call dgemm('N','T', nO*nO*nV, nV, nV, &
+             0.5d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                    A_vv  , size(A_vv,1), &
+             0d0  , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+             
+  deallocate(A_vv)!,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j)
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do i=1,nO
+        do m=1,nO
+          X_oovv(m,i,a,b) = t2(i,m,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nO, nO*nV*nV, nO, &
+             1d0, cF_oo , size(cF_oo,1), &
+                  X_oovv, size(X_oovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(Y_oovv)!,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  
+  call dgemm('N','T', nO, nO, nV, &
+        1d0, t1   , size(t1,1), &
+             cF_ov, size(cF_ov,1), &
+        0d0, A_oo , size(A_oo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,B_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do i = 1, nO
+        do m = 1, nO
+          B_oovv(m,i,a,b) = t2(i,m,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nO*nV*nV, nO, &
+             0.5d0, A_oo, size(A_oo,1), &
+                    B_oovv, size(B_oovv,1), &
+             0d0  , X_oovv, size(X_oovv,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(A_oo,B_oovv,X_oovv)
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do n=1,nO
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO*nO, nV*nV, nO*nO, &
+             0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), &
+                    tau    , size(tau,1) * size(tau,2), &
+             1d0  , r2     , size(r2,1) * size(r2,2))
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do f=1,nV
+  !          do e=1,nV
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !call dgemm('N','T', nO*nO, nV*nV, nV*nV, &
+  !           0.5d0, tau    , size(tau,1) * size(tau,2), &
+  !                  cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), &
+  !           1d0  , r2     , size(r2,1) * size(r2,2))
+  double precision :: ti,tf
+  call wall_time(ti)
+  call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
+  call wall_time(tf)
+  if (cc_dev) then
+    print*,'cW_vvvv:',tf-ti,'s'
+  endif
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b)                                                 & 
+  !                        + t2(i,m,a,e)*cW_ovvo(m,b,e,j) &
+  !                        - t2(j,m,a,e)*cW_ovvo(m,b,e,i) &
+  !                        - t2(i,m,b,e)*cW_ovvo(m,a,e,j) &
+  !                        + t2(j,m,b,e)*cW_ovvo(m,a,e,i) &
+  !                        - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) &
+  !                        + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) &
+  !                        + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) &
+  !                        - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do a = 1, nV
+    do i = 1, nO
+      do e = 1, nV
+        do m = 1, nO
+          A_ovov(m,e,i,a) = t2(i,m,a,e)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO NOWAIT
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do e = 1, nV
+        do m = 1, nO
+          B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO*nV, nV*nO, nO*nV, &
+             1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), &
+                  B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), &
+             0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) &
+                                    - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_ovov,B_ovvo,X_ovvo)
+  allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do m = 1, nO
+    do j = 1, nO
+      do b = 1, nV
+        do e = 1, nV
+          A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nV*nO*nO, nV, &
+             1d0, t1    , size(t1,1), &
+                  A_vvoo, size(A_vvoo,1), &
+             0d0, B_ovoo, size(B_ovoo,1))
+  
+  call dgemm('N','N', nO*nV*nO, nV, nO, &
+             1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), &
+                  t1    , size(t1,1), &
+             0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,C_ovov,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) &
+                                    + C_ovov(i,a,j,b) - C_ovov(j,a,i,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_vvoo, B_ovoo, C_ovov)
+                  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO))
+  allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO))
+  do b = 1, nV
+
+    call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+         cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+         nV,nV,nO, v_vbvo)
+    
+    !$OMP PARALLEL &
+    !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) &
+    !$OMP PRIVATE(i,j,a,e,m) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do j = 1, nO
+        !do b = 1, nV
+          do a = 1, nV
+            !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j)
+            A_vbov(a,j,e) = v_vbvo(a,e,j)
+          enddo
+        !enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+    call dgemm('N','T', nV*nO, nO, nV, &
+               1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), &
+                    t1    , size(t1,1), &
+               0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2))
+    !call dgemm('N','T', nV*nV*nO, nO, nV, &
+    !           1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), &
+    !                t1    , size(t1,1), &
+    !           0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3))
+    
+    !$OMP PARALLEL &
+    !$OMP SHARED(b,r2,X_vboo,nO,nV) &
+    !$OMP PRIVATE(i,j,a,e,m) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    !do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, NO
+             !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j)
+             r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j)
+          enddo
+        enddo
+      enddo
+    !enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+  enddo
+  
+  !deallocate(A_vvov)!,X_vvoo)
+  deallocate(A_vbov, X_vboo, v_vbvo)
+  allocate(X_vvoo(nV,nV,nO,nO))
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !allocate(X_vvoo(nV,nV,nO,nO))
+  
+  call dgemm('T','N', nV, nV*nO*nO, nO, &
+             1d0, t1    , size(t1,1), &
+                  v_ovoo, size(v_ovoo,1), &
+             0d0, X_vvoo, size(X_vvoo,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_vvoo)
+
+end
+#+end_src
+
+* Use intermediates
+** Use cF_oo
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2)
+
+  implicit none
+
+  integer,intent(in)              :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)
+  double precision, intent(in)    :: F_oo(nO,nV), F_ov(nO,nV)
+  double precision, intent(in)    :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, allocatable   :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:)
+  integer                         :: i,j,m,a,b
+
+  allocate(cF_oo(nO,nO))
+  
+  call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
+  
+  !do a=1,nV
+  !  do i=1,nO
+  !    do m=1,nO
+  !      r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nO, nV, nO, &
+             -1d0, cF_oo, size(cF_oo,1), &
+                   t1   , size(t1,1), &
+              1d0, r1   , size(r1,1))
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do m=1,nO
+  !          r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j)
+  !          r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV))
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do i=1,nO
+        do m=1,nO
+          X_oovv(m,i,a,b) = t2(i,m,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nO, nO*nV*nV, nO, &
+             1d0, cF_oo , size(cF_oo,1), &
+                  X_oovv, size(X_oovv,1), &
+             0d0, Y_oovv, size(Y_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(cF_oo,X_oovv,Y_oovv)
+
+end
+#+end_src
+
+** Use cF_ov
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)    :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cF_ov(:,:), A_oo(:,:), A_vv(:,:)
+  double precision, allocatable   :: X_oovv(:,:,:,:), B_oovv(:,:,:,:)
+  integer                         :: i,j,a,b,e,m
+
+  allocate(cF_ov(nO,nV))
+  
+  call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r1,t2,cF_ov,nO,nV) &
+  !$OMP PRIVATE(i,a,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(1)
+  do a=1,nV
+    do i=1,nO
+      do e=1,nV
+        do m=1,nO
+          r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV))
+  call dgemm('T','N', nV, nV, nO, &
+             1d0, t1   , size(t1,1), &
+                  cF_ov, size(cF_ov,1), &
+             0d0, A_vv , size(A_vv,1))
+
+  call dgemm('N','T', nO*nO*nV, nV, nV, &
+             0.5d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
+                    A_vv  , size(A_vv,1), &
+             0d0  , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,r2,X_oovv) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+             
+  deallocate(A_vv)
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do e=1,nV
+  !          do m=1,nO
+  !            r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e)
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
+  
+  call dgemm('N','T', nO, nO, nV, &
+        1d0, t1   , size(t1,1), &
+             cF_ov, size(cF_ov,1), &
+        0d0, A_oo , size(A_oo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(t2,B_oovv,nO,nV) &
+  !$OMP PRIVATE(i,m,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do i = 1, nO
+        do m = 1, nO
+          B_oovv(m,i,a,b) = t2(i,m,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('N','N', nO, nO*nV*nV, nO, &
+             0.5d0, A_oo, size(A_oo,1), &
+                    B_oovv, size(B_oovv,1), &
+             0d0  , X_oovv, size(X_oovv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(r2,X_oovv,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b=1,nV
+    do a=1,nV
+      do j=1,nO
+        do i=1,nO
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(cF_ov,A_oo,B_oovv,X_oovv)
+  
+end
+#+end_src
+
+** Use cF_vv
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine use_cF_vv(nO,nV,t1,t2,r1,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cF_vv(:,:)
+  integer                         :: i,j,a,b,e,m
+
+  allocate(cF_vv(nV,nV))
+  
+  !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv)
+
+  deallocate(cF_vv)
+  
+end
+#+end_src
+
+** Use cW_vvvd
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV
+  double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
+  double precision, intent(in)    :: v_oovv(nO,nO,nV,nV)
+  !double precision, intent(in)    :: v_vovv(nV,nO,nV,nV)
+  
+  double precision, intent(inout) :: r2(nO,nO,nV,nV)
+
+  double precision, allocatable   :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:)
+  integer                         :: i,j,e,f
+  double precision                :: ti,tf
+
+  allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV))
+
+  !PROVIDE cc_nVab
+  
+  !do b=1,nV
+  !  do a=1,nV
+  !    do j=1,nO
+  !      do i=1,nO
+
+  !        do f=1,nV
+  !          do e=1,nV
+  !            r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  do f = 1, nV
+    call wall_time(ti)
+    !$OMP PARALLEL &
+    !$OMP SHARED(tau,tau_f,f,nO,nV) &
+    !$OMP PRIVATE(i,j,e) &
+    !$OMP DEFAULT(NONE)
+    
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          tau_f(i,j,e) = tau(i,j,e,f)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'1st transpo', tf-ti
+    endif
+
+    call wall_time(ti)
+    call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+         cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+         nV,nV,nV, v_vvvf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'vvvf', tf-ti
+    endif
+    call wall_time(ti)
+    call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, &
+         cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, &
+         nV,nO,nV, v_vovf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'vovf', tf-ti
+    endif
+    
+    call wall_time(ti)
+    call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'cW_vvvf', tf-ti
+    endif
+
+    call wall_time(ti)
+    call dgemm('N','T', nO*nO, nV*nV, nV, &
+               0.5d0, tau_f    , size(tau_f,1) * size(tau_f,2), &
+                      cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), &
+               1d0  , r2     , size(r2,1) * size(r2,2))
+    call wall_time(tf)
+    if (cc_dev .and. f == 1) then
+      print*,'last dgemm', tf-ti
+    endif
+  enddo
+
+  deallocate(cW_vvvf,v_vvvf,v_vovf)
+  
+end  
+#+end_src
+
+* Intermediates
+** cF
+*** cF_oo
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: tau_t(nO,nO,nV,nV)
+  double precision,intent(in)   :: Foo(nO,nO)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: v_ooov(nO,nO,nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cF_oo(nO,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision,external     :: Kronecker_Delta
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) &
+  !$OMP PRIVATE(i,m,n,e) &
+  !$OMP DEFAULT(NONE)
+  
+  !do i=1,nO
+  !  do m=1,nO
+  !    cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i)
+  !  end do
+  !end do
+  !$OMP DO collapse(1)
+  do i=1,nO
+    do m=1,nO
+        cF_oo(m,i) = Foo(m,i)
+    end do
+  end do
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, nO
+    cF_oo(i,i) = 0d0
+  end do
+  !$OMP END DO
+  
+  do e=1,nV
+    do n=1,nO
+      !$OMP DO collapse(1)
+      do i=1,nO
+        do m=1,nO
+          cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e)
+        end do
+      end do
+      !$OMP END DO
+    end do
+  end do
+  !$OMP END PARALLEL
+
+  !do i=1,nO
+  !  do m=1,nO
+  !    do e=1,nV
+  !      cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nO, nV,&
+             0.5d0, Fov  , size(Fov,1), &
+                    t1   , size(t1,1), &
+             1d0  , cF_oo, size(cF_oo,1))
+
+  !do i=1,nO
+  !  do m=1,nO
+  !    do f=1,nV
+  !      do e=1,nV
+  !        do n=1,nO
+  !          cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('N','T', nO, nO, nO*nV*nV, &
+             0.5d0, v_oovv, size(v_oovv,1), &
+                    tau_t , size(tau_t,1), &
+             1d0  , cF_oo , size(cF_oo,1)) 
+  
+end
+#+end_src
+
+*** cF_ov
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: Fov(nO,nV),v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cF_ov(nO,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) &
+  !$OMP PRIVATE(i,a,m,n,e,f) &
+  !$OMP DEFAULT(NONE)
+  
+  !cF_ov = Fov
+
+  !$OMP DO collapse(1)
+  do e=1,nV
+    do m=1,nO
+      cF_ov(m,e) = Fov(m,e)
+      do f=1,nV
+        do n=1,nO
+          cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
+*** cF_vv
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: tau_t(nO,nO,nV,nV)
+  double precision,intent(in)   :: Fov(nO,nV)
+  double precision,intent(in)   :: Fvv(nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: cF_vv(nV,nV)
+  
+  double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:)
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cF_vv,Fvv,nO,nV) &
+  !$OMP PRIVATE(e,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do e=1,nV
+    do a=1,nV
+      cF_vv(a,e) = Fvv(a,e)
+    end do
+  end do
+  !$OMP END DO
+  !$OMP DO
+  do e = 1, nV
+    cF_vv(e,e) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ 
+  !do e=1,nV
+  !  do a=1,nV
+  !    do m=1,nO
+  !      cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e)
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nV, nV, nO, &
+             -0.5d0, t1   , size(t1,1), &
+                     Fov  , size(Fov,1), &
+              1d0  , cF_vv, size(cF_vv,1))
+  
+  !do e=1,nV
+  !  do a=1,nV
+  !    do m=1,nO
+  !      do f=1,nV
+  !        cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e)
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV))
+  do f = 1, nV
+
+     call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+                              cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+                              nO,nV,nV, v_ovfv)
+
+    !$OMP PARALLEL &
+    !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) &
+    !$OMP PRIVATE(m,a,e) &
+    !$OMP DEFAULT(NONE)
+    !$OMP DO collapse(2)
+    do e = 1, nV
+      do a = 1, nV
+        do m = 1, nO
+          !X_ovfv(m,a,e) = v_ovvv(m,a,f,e)
+          X_ovfv(m,a,e) = v_ovfv(m,a,e)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+    call dgemv('T', nO, nV*nV, &
+               !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), &
+               1d0, X_ovfv, size(X_ovfv,1), &
+                    t1(1,f), 1, &
+               1d0, cF_vv, 1)
+  enddo
+  deallocate(v_ovfv,X_ovfv)
+
+  !do e=1,nV
+  !  do a=1,nV
+  !    do f=1,nV
+  !      do n=1,nO
+  !        do m=1,nO
+  !          cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  do f = 1, nV
+     call dgemm('T','N', nV, nV, nO*nO,&
+                -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), &
+                        v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), &
+                1d0   , cF_vv, size(cF_vv,1))
+  enddo
+
+end
+#+end_src
+
+** cW
+*** cW_oooo
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oooo(nO,nO,nO,nO)
+  double precision,intent(in)   :: v_ooov(nO,nO,nO,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+
+  double precision,intent(out)  :: cW_oooo(nO,nO,nO,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: X_oooo(:,:,:,:)
+
+  ! oooo block  
+
+  !cW_oooo = v_oooo
+
+  !do j=1,nO
+  !  do i=1,nO
+  !    do n=1,nO
+  !      do m=1,nO
+
+  !        do e=1,nV
+  !          cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e)
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(X_oooo(nO,nO,nO,nO))
+  
+  call dgemm('N','T', nO*nO*nO, nO, nV, &
+             1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), &
+                  t1    , size(t1,1), &
+             0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3))
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) &
+  !$OMP PRIVATE(i,j,m,n) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do i=1,nO
+      do n=1,nO
+        do m=1,nO
+          cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(X_oooo)
+  
+  !do m=1,nO
+  !  do n=1,nO
+  !    do i=1,nO
+  !      do j=1,nO
+  !         
+  !        do e=1,nV
+  !          do f=1,nV
+  !            cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  call dgemm('N','T', nO*nO, nO*nO, nV*nV, &
+             0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), &
+                     tau    , size(tau,1) * size(tau,2), &
+             1.d0  , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2))
+  
+end
+#+end_src
+
+*** cW_ovvo
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
+
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovo(nO,nO,nV,nO)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_ovvo(nO,nV,nV,nO)
+  !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
+
+  double precision,intent(out)  :: cW_ovvo(nO,nV,nV,nO)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,e,f
+  double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:)
+  double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:)
+  double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do i = 1, nO
+          cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  !do m=1,nO
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do j=1,nO
+  !        do f=1,nV
+  !          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO))
+  do e = 1, nV
+
+    call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
+                              cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
+                              nO,nV,nV, v_ovev)
+     
+    call dgemm('N','T', nO*nV, nO, nV, &
+               1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), &
+                     t1     , size(t1,1), &
+               0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2))
+    !$OMP PARALLEL &
+    !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) &
+    !$OMP PRIVATE(m,b,j) &
+    !$OMP DEFAULT(NONE)
+    !$OMP DO collapse(2)
+    do j = 1, nO
+      do b = 1, nV
+        do m = 1, nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+  enddo
+  deallocate(v_ovev,cW_oveo)
+  !call dgemm('N','T', nO*nV*nV, nO, nV, &
+  !           1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), &
+  !                 t1     , size(t1,1), &
+  !           1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3))
+
+  !do j=1,nO
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do m=1,nO
+  !        do n=1,nO
+  !          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  
+  allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(A_oovo,v_oovo,nO,nV) &
+  !$OMP PRIVATE(j,e,m,n) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do e=1,nV
+      do m=1,nO
+        do n=1,nO
+          A_oovo(n,m,e,j) = v_oovo(m,n,e,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nV, nO*nV*nO, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_oovo, size(A_oovo,1), &
+             0d0, B_vovo, size(B_vovo,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) &
+  !$OMP PRIVATE(j,e,m,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j=1,nO
+    do e=1,nV
+      do b=1,nV
+        do m=1,nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  deallocate(A_oovo,B_vovo)
+
+  !do j=1,nO
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do m=1,nO
+  !        do f=1,nV
+  !          do n=1,nO
+  !            cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) &
+  !                            - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) &
+  !$OMP PRIVATE(f,n,m,e,j,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do j = 1, nO
+      do n = 1, nO
+        do f = 1, nV
+          A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP DO collapse(3)
+  do e = 1, nV
+    do m = 1, nO
+      do n = 1, nO
+        do f = 1, nV
+          B_voov(f,n,m,e) = v_oovv(m,n,e,f)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  call dgemm('T','N', nO*nV, nV*nO, nV*nO, &
+             1d0, A_voov, size(A_voov,1) * size(A_voov,2), &
+                  B_voov, size(B_voov,1) * size(B_voov,2), &
+             0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2))
+  
+  deallocate(A_voov,B_voov)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) &
+  !$OMP PRIVATE(j,e,m,b) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do e = 1, nV
+      do b = 1, nV
+        do m = 1, nO
+          cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(C_ovov)
+
+end
+#+end_src
+
+*** cW_vvvv
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
+ 
+  implicit none
+
+  integer,intent(in)            :: nO,nV
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_vovv(nV,nO,nV,nV)
+  double precision,intent(in)   :: v_vvvv(nV,nV,nV,nV)
+
+  double precision,intent(out)  :: cW_vvvv(nV,nV,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,c,d,e,f
+  double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:)
+
+  allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) &
+  !$OMP PRIVATE(a,b,c,d,e,f,m) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do d = 1, nV
+    do c = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+
+  !do f=1,nV
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do a=1,nV
+  !        do m=1,nO
+  !          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  !$OMP DO collapse(3)
+  do f=1,nV
+    do e=1,nV
+      do a=1,nV
+        do m=1,nO
+          A_ovvv(m,a,e,f) = v_vovv(a,m,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nV, nV*nV*nV, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_ovvv, size(A_ovvv,1), &
+             0d0, B_vvvv, size(B_vvvv,1))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) &
+  !$OMP PRIVATE(a,b,c,d,e,f,m) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do f=1,nV
+    do e=1,nV
+      do b=1,nV
+        do a=1,nV
+          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f)
+        end do
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  deallocate(A_ovvv,B_vvvv)
+
+  !do a=1,nV
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do f=1,nV
+  !         
+  !        do m=1,nO
+  !          do n=1,nO
+  !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+  call dgemm('T','N', nV*nV, nV*nV, nO*nO, &
+             0.25d0, tau    , size(tau,1) * size(tau,2), &
+                     v_oovv , size(v_oovv,1) * size(v_oovv,2), &
+             1.d0  , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2))
+
+end
+#+end_src
+
+*** cW_vvvf
+#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f
+subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
+ 
+  implicit none
+
+  integer,intent(in)            :: nO,nV,f
+  double precision,intent(in)   :: t1(nO,nV)
+  double precision,intent(in)   :: t2(nO,nO,nV,nV)
+  double precision,intent(in)   :: tau(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_oovv(nO,nO,nV,nV)
+  double precision,intent(in)   :: v_vovf(nV,nO,nV)
+  double precision,intent(in)   :: v_vvvf(nV,nV,nV)
+
+  double precision,intent(out)  :: cW_vvvf(nV,nV,nV)
+
+  integer                       :: i,j,m,n
+  integer                       :: a,b,c,d,e
+  double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:)
+  double precision :: ti,tf
+
+  allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV))
+  allocate(v_oovf(nO,nO,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) &
+  !$OMP PRIVATE(a,b,c,d,e,m) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d)
+        cW_vvvf(a,b,c) = v_vvvf(a,b,c)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+
+  !do f=1,nV
+  !  do e=1,nV
+  !    do b=1,nV
+  !      do a=1,nV
+  !        do m=1,nO
+  !          cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f)
+  !        end do
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !$OMP DO collapse(2)
+  do e=1,nV
+    do a=1,nV
+      do m=1,nO
+        !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f)
+        !A_ovvf(m,a,e) = v_vovv(a,m,e,f)
+        A_ovvf(m,a,e) = v_vovf(a,m,e)
+      end do
+    end do
+  end do
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', nV, nV*nV, nO, &
+             1d0, t1    , size(t1,1), &
+                  A_ovvf, size(A_ovvf,1), &
+             0d0, B_vvvf, size(B_vvvf,1))
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) &
+  !$OMP PRIVATE(a,b,c,d,e,m,n) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do e=1,nV
+    do b=1,nV
+      do a=1,nV
+        !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f)
+        cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e)
+      end do
+    end do
+  end do
+  !$OMP END DO NOWAIT
+  
+  !deallocate(A_ovvf,B_vvvf)
+
+  !do a=1,nV
+  !  do b=1,nV
+  !    do e=1,nV
+  !      do f=1,nV
+  !         
+  !        do m=1,nO
+  !          do n=1,nO
+  !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
+  !          end do
+  !        end do
+
+  !      end do
+  !    end do
+  !  end do
+  !end do
+
+  !$OMP DO collapse(2)
+  do e = 1, nV
+    do n = 1, nO
+      do m = 1, nO
+        v_oovf(m,n,e) = v_oovv(m,n,e,f)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL  
+  
+  call dgemm('T','N', nV*nV, nV, nO*nO, &
+             0.25d0, tau    , size(tau,1) * size(tau,2), &
+                     v_oovf , size(v_oovf,1) * size(v_oovf,2), &
+             1.d0  , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2))
+  
+  deallocate(v_oovf)
+  deallocate(A_ovvf,B_vvvf)
+
+end
+#+end_src
+
diff --git a/src/ccsd/org/ccsd_t_space_orb.org b/src/ccsd/org/ccsd_t_space_orb.org
new file mode 100644
index 00000000..8709d7be
--- /dev/null
+++ b/src/ccsd/org/ccsd_t_space_orb.org
@@ -0,0 +1,428 @@
+Ref:
+Integral-Direct and Parallel Implementation of the CCSD(T) Method:
+Algorithmic Developments and Large-Scale Applications
+László Gyevi-Nagy, Mihály Kállay, and Péter R. Nagy
+J. Chem. Theory Comput. 2020, 16, 1, 366–384
+https://doi.org/10.1021/acs.jctc.9b00957
+
+* Dumb way
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine ccsd_par_t_space(nO,nV,t1,t2,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: energy
+  
+  double precision, allocatable :: W(:,:,:,:,:,:)
+  double precision, allocatable :: V(:,:,:,:,:,:)
+  integer :: i,j,k,a,b,c
+  
+  allocate(W(nO,nO,nO,nV,nV,nV))
+  allocate(V(nO,nO,nO,nV,nV,nV))
+
+  call form_w(nO,nV,t2,W)   
+  call form_v(nO,nV,t1,W,V)
+
+  energy = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c))  !delta_ooovvv(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  
+  energy = energy / 3d0
+  
+  deallocate(V,W)
+end
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine form_w(nO,nV,t2,W)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV)
+  
+  integer :: i,j,k,l,a,b,c,d
+
+  W = 0d0
+  do c = 1, nV
+    print*,'W:',c,'/',nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+
+              do d = 1, nV
+                W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+                ! chem (bd|ai)
+                ! phys <ba|di>
+                + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+                + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
+                + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
+                + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
+                + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
+                + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
+              enddo
+
+              do l = 1, nO
+                W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+                ! chem (ck|jl)
+                ! phys <cj|kl>
+                - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
+                - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
+                - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
+                - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
+                - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
+                - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
+              enddo
+
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine form_v(nO,nV,t1,w,v)
+
+implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: W(nO, nO, nO, nV, nV, nV)
+  double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV)
+
+  integer :: i,j,k,a,b,c
+
+  V = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
+              + cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
+              + cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
+              + cc_space_v_vvoo(a,b,i,j) * t1(k,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
+#+END_SRC
+
+* Better way
+** Main
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO,nV), f_o(nO), f_v(nV)
+  double precision, intent(in)  :: t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
+  double precision, intent(out) :: energy
+  
+  double precision, allocatable :: W(:,:,:,:,:,:)
+  double precision, allocatable :: V(:,:,:,:,:,:)
+  double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:)
+  double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:)
+  double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:)
+  integer                       :: i,j,k,l,a,b,c,d
+  double precision              :: e,ta,tb, delta, delta_ijk
+ 
+  !allocate(W(nV,nV,nV,nO,nO,nO))
+  !allocate(V(nV,nV,nV,nO,nO,nO))
+  allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV))
+  allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO))
+  allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO))
+
+  ! Temporary arrays
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, &
+  !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
+  !$OMP PRIVATE(a,b,c,d,i,j,k,l) &
+  !$OMP DEFAULT(NONE)
+  
+  !v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+  !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j)
+  
+  !$OMP DO collapse(3)
+  do i = 1, nO
+    do a = 1, nV
+      do b = 1, nV
+        do d = 1, nV
+          X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do k = 1, nO
+      do c = 1, nV
+        do d = 1, nV
+          T_vvoo(d,c,k,j) = t2(k,j,c,d)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+ 
+  !v_vooo(c,j,k,l) * t2(i,l,a,b) &
+  !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
+
+  !$OMP DO collapse(3)
+  do k = 1, nO
+    do j = 1, nO
+      do c = 1, nV
+        do l = 1, nO
+           X_ovoo(l,c,j,k) = v_vooo(c,j,k,l)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(3)
+  do i = 1, nO
+    do b = 1, nV
+      do a = 1, nV
+        do l = 1, nO
+          T_ovvo(l,a,b,i) = t2(i,l,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+                     
+  !v_vvoo(b,c,j,k) * t1(i,a) &
+  !X_vvoo(b,c,k,j) * T1_vo(a,i) &
+  
+  !$OMP DO collapse(3)
+  do j = 1, nO
+    do k = 1, nO
+      do c = 1, nV
+        do b = 1, nV
+          X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(1)
+  do i = 1, nO
+    do a = 1, nV
+      T_vo(a,i) = t1(i,a)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(ta)
+  energy = 0d0
+  do i = 1, nO
+    do j = 1, nO
+      do k = 1, nO
+        delta_ijk = f_o(i) + f_o(j) + f_o(k)
+        call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk)
+        call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk)
+        !$OMP PARALLEL &
+        !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) &
+        !$OMP PRIVATE(a,b,c,e,delta) &
+        !$OMP DEFAULT(NONE)
+        e = 0d0
+        !$OMP DO
+        do c = 1, nV
+          do b = 1, nV
+            do a = 1, nV
+              delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c))
+              !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c))  !delta_ooovvv(i,j,k,a,b,c)
+              e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) &
+                       * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta
+            enddo
+          enddo
+        enddo
+        !$OMP END DO
+        !$OMP CRITICAL
+        energy = energy + e
+        !$OMP END CRITICAL
+        !$OMP END PARALLEL
+      enddo
+    enddo
+    call wall_time(tb)
+    write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s'
+  enddo
+  
+  energy = energy / 3d0
+
+  deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo)
+  !deallocate(V,W)
+end
+#+END_SRC
+
+** W_ijk
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV,i,j,k
+  !double precision, intent(in) :: t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO)
+  double precision, intent(in)  :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO)
+  double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO)
+  
+  integer :: l,a,b,c,d
+
+  !W = 0d0
+  !do i = 1, nO
+  !  do j = 1, nO
+  !    do k = 1, nO
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) &
+  !$OMP PRIVATE(a,b,c,d,l) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        W(a,b,c) = 0d0
+
+        do d = 1, nV
+          !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+          W(a,b,c) = W(a,b,c) &
+          ! chem (bd|ai)
+          ! phys <ba|di>
+          !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+          !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj
+          !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik
+          !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij
+          !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj
+          !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik
+          + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) &
+          + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj
+          + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik
+          + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij
+          + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj
+          + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik
+        enddo
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+         
+        do l = 1, nO
+          !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) &
+          W(a,b,c) = W(a,b,c) &
+          ! chem (ck|jl)
+          ! phys <cj|kl>
+          !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) &
+          !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj
+          !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik
+          !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij
+          !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj
+          !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik
+          - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
+          - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj
+          - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik
+          - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij
+          - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj
+          - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik
+        enddo
+
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !    enddo
+  !  enddo
+  !enddo
+
+end
+#+END_SRC
+
+** V_ijk
+#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f
+subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v)
+
+implicit none
+
+  integer, intent(in)           :: nO,nV,i,j,k
+  !double precision, intent(in)  :: t1(nO,nV)
+  double precision, intent(in)  :: T_vo(nV,nO)
+  double precision, intent(in)  :: X_vvoo(nV,nV,nO,nO)
+  double precision, intent(in)  :: W(nV,nV,nV)!,nO,nO,nO)
+  double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO)
+
+  integer :: a,b,c
+
+  !V = 0d0
+  !do i = 1, nO
+  !  do j = 1, nO
+  !    do k = 1, nO
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) &
+  !$OMP PRIVATE(a,b,c) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(2)
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
+        V(a,b,c) = W(a,b,c) &
+        !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) &
+        !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) &
+        !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c)
+        + X_vvoo(b,c,k,j) * T_vo(a,i) &
+        + X_vvoo(a,c,k,i) * T_vo(b,j) &
+        + X_vvoo(a,b,j,i) * T_vo(c,k)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+  !    enddo
+  !  enddo
+  !enddo
+
+end
+#+END_SRC
diff --git a/src/ccsd/org/ccsd_t_spin_orb.org b/src/ccsd/org/ccsd_t_spin_orb.org
new file mode 100644
index 00000000..c9a41abd
--- /dev/null
+++ b/src/ccsd/org/ccsd_t_spin_orb.org
@@ -0,0 +1,385 @@
+* CCSD(T) spin orb
+
+Ref:
+John D. Watts, Jürgen Gauss, and Rodney J. Bartlett
+J. Chem. Phys. 98, 8718 (1993)
+http://dx.doi.org/10.1063/1.464480
+
+** v1
+#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f
+subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+  double precision, intent(in)  :: v_ooov(nO,nO,nO,nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO)
+  double precision, intent(out) :: energy
+
+  double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:)
+  double precision              :: e_t, e_st, e_dt, delta_abc, delta
+  integer                       :: i,j,k,l,m,a,b,c,d,e
+
+  allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV))
+
+  t3 = 0d0
+  
+  ! T3
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              do e = 1, nV
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+                  + t2(j,k,a,e) * v_vvvo(b,c,e,i) &
+                  - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
+                  - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
+                  - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
+                  - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
+                  + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
+                  + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
+                  + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
+                  + t2(j,i,c,e) * v_vvvo(b,a,e,k)   ! + P(ik) P(ac)
+              enddo
+              do m = 1, nO
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) &
+                  + t2(m,i,b,c) * v_ooov(j,k,m,a) &
+                  - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
+                  - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
+                  - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
+                  - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
+                  + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
+                  + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
+                  + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
+                  + t2(m,k,b,a) * v_ooov(j,i,m,c)   ! + P(ik) P(ac)
+              enddo
+                t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  
+
+  ! E_T
+  e_t = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  e_t = e_t / 36d0
+
+  ! E_ST
+  s = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  e_st = 0d0
+  do a = 1, nV
+    do i = 1, nO
+      e_st = e_st + s(i,a) * t1(i,a)
+    enddo
+  enddo
+  e_st = e_st * 0.25d0
+
+  ! E_DT
+  e_dt = 0d0
+  do c = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c)
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  e_dt = e_dt * 0.25d0
+
+  ! (T)
+  !print*,e_t,e_st,e_dt
+  energy = e_t + e_st + e_dt
+
+  deallocate(t3,s)
+  
+end
+#+end_src
+
+** v2
+#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f
+subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+  double precision, intent(in)  :: v_ooov(nO,nO,nO,nV)
+  double precision, intent(in)  :: v_vvoo(nV,nV,nO,nO)
+  double precision, intent(out) :: energy
+
+  double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:)
+  double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:)
+  double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:)
+  double precision              :: e_st, delta_abc, delta, ta, tb
+  integer                       :: i,j,k,l,m,a,b,c,d,e
+
+  allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV))
+  allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV))
+
+  call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
+       cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+       cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+       nV,nV,nV,nO, v_vvvo)
+  
+  ! Init 
+  s = 0d0
+  e_t = 0d0
+  e_st = 0d0
+  e_dt = 0d0
+
+  call wall_time(ta)
+  !$OMP PARALLEL &
+  !$OMP PRIVATE(i,j,k,m,a,b,c,e) &
+  !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, &
+  !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) &
+  !$OMP DEFAULT(NONE)
+
+  !$OMP DO collapse(3)
+  do c = 1, nV
+    do b = 1, nV
+      do i = 1, nO
+        do e = 1, nV
+          A_vovv(e,i,b,c) = v_vvvo(b,c,e,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$omp do collapse(3)
+  do a = 1, nV
+    do k = 1, nO
+      do j = 1, nO
+        do e = 1, nV
+          T_voov(e,j,k,a) = t2(j,k,a,e)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do nowait
+
+  !$omp do collapse(3)
+  do a = 1, nV
+    do k = 1, nO
+      do j = 1, nO
+        do m = 1, nO
+          B_ooov(m,j,k,a) = v_ooov(j,k,m,a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$omp end do
+
+  do c = 1, nV
+    do b = 1, nV
+
+      ! T3(:,:,:,:,b,c)
+      ! Init
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              t3_bc(i,j,k,a) = 0d0
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              do e = 1, nV
+                t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
+                   !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) &
+                   !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij)
+                   !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik)
+                   !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab)
+                   !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac)
+                   !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab)
+                   !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac)
+                   !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab)
+                   !+ t2(j,i,c,e) * v_vvvo(b,a,e,k)   ! + P(ik) P(ac)
+                   + T_voov(e,j,k,a) * A_vovv(e,i,b,c) &
+                   - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij)
+                   - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik)
+                   - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab)
+                   - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac)
+                   + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab)
+                   + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac)
+                   + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab)
+                   + T_voov(e,j,i,c) * A_vovv(e,k,b,a)   ! + P(ik) P(ac)
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      
+      !$OMP DO collapse(3)
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              do m = 1, nO
+                t3_bc(i,j,k,a) = t3_bc(i,j,k,a) &
+                   !+ t2(m,i,b,c) * v_ooov(j,k,m,a) &
+                   !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij)
+                   !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik)
+                   !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab)
+                   !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac)
+                   !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab)
+                   !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac)
+                   !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab)
+                   !+ t2(m,k,b,a) * v_ooov(j,i,m,c)   ! + P(ik) P(ac)
+                   + t2(m,i,b,c) * B_ooov(m,j,k,a) &
+                   - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij)
+                   - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik)
+                   - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab)
+                   - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac)
+                   + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab)
+                   + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac)
+                   + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab)
+                   + t2(m,k,b,a) * B_ooov(m,j,i,c)   ! + P(ik) P(ac)
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+
+      !$OMP DO
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+               delta = f_o(i) + f_o(j) + f_o(k) - delta_abc 
+               t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta)
+             enddo
+           enddo
+         enddo
+       enddo
+      !$OMP END DO
+
+      ! E_T
+      !$OMP DO
+      do a = 1, nV
+        delta_abc = f_v(a) + f_v(b) + f_v(c)
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              delta = f_o(i) + f_o(j) + f_o(k) - delta_abc
+              e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO nowait
+
+      ! E_ST
+      !$OMP DO
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO nowait
+
+      ! E_DT
+      !$OMP DO
+      do a = 1, nV
+        do k = 1, nO
+          do j = 1, nO
+            do i = 1, nO
+              e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+    enddo
+    !$OMP MASTER
+    call wall_time(tb)
+    write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s'
+    !$OMP END MASTER
+  enddo
+  !$OMP END PARALLEL
+
+  do a = 2, nV
+    e_t(1) = e_t(1) + e_t(a)
+  enddo
+  
+  do a = 2, nV
+    e_dt(1) = e_dt(1) + e_dt(a)
+  enddo
+
+  e_t = e_t / 36d0
+  
+  do a = 1, nV
+    do i = 1, nO
+      e_st = e_st + s(i,a) * t1(i,a)
+    enddo
+  enddo
+  e_st = e_st * 0.25d0
+
+  e_dt = e_dt * 0.25d0
+
+  ! (T)
+  !print*,e_t(1),e_st,e_dt(1)
+  energy = e_t(1) + e_st + e_dt(1)
+
+  deallocate(t3_bc,s)
+  
+end
+#+end_src
diff --git a/src/cipsi/NEED b/src/cipsi/NEED
index 85d01f79..5bd742bc 100644
--- a/src/cipsi/NEED
+++ b/src/cipsi/NEED
@@ -3,3 +3,4 @@ zmq
 mpi
 iterations
 csf
+mol_properties
diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f
index da77a527..5225c6df 100644
--- a/src/cipsi/cipsi.irp.f
+++ b/src/cipsi/cipsi.irp.f
@@ -70,8 +70,8 @@ subroutine run_cipsi
 
   do while (                                                         &
         (N_det < N_det_max) .and.                                    &
-        (sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and.               &
-        (sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and.     &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and.               &
+        (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.     &
         (correlation_energy_ratio <= correlation_energy_ratio_max)   &
         )
       write(*,'(A)')  '--------------------------------------------------------------------------------'
@@ -108,6 +108,7 @@ subroutine run_cipsi
 
     call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
+    call print_mol_properties()
     N_iter += 1
 
     if (qp_stop()) exit
@@ -156,6 +157,7 @@ subroutine run_cipsi
       pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2)
     call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
+    call print_mol_properties()
   endif
   call pt2_dealloc(pt2_data)
   call pt2_dealloc(pt2_data_err)
diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f
index c7cee1ac..7909007a 100644
--- a/src/cipsi/pt2_stoch_routines.irp.f
+++ b/src/cipsi/pt2_stoch_routines.irp.f
@@ -131,7 +131,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
   PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
   PROVIDE psi_det_hii selection_weight pseudo_sym
-  PROVIDE list_act list_inact list_core list_virt list_del seniority_max
+  PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
   PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
 
   if (h0_type == 'CFG') then
@@ -290,9 +290,9 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
       call set_multiple_levels_omp(.False.)
 
 
-      print '(A)', '========== ======================= ===================== ===================== ==========='
-      print '(A)', ' Samples          Energy                Variance               Norm^2            Seconds'
-      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
+      print '(A)', ' Samples           Energy             PT2            Variance          Norm^2       Convergence    Seconds'
+      print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
 
       PROVIDE global_selection_buffer
 
@@ -316,7 +316,8 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
       call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
       call set_multiple_levels_omp(.True.)
 
-      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
+
 
       do k=1,N_states
         pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
@@ -414,6 +415,17 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
   double precision :: rss
   double precision, external :: memory_of_double, memory_of_int
 
+  character(len=20) :: format_str1, str_error1, format_str2, str_error2
+  character(len=20) :: format_str3, str_error3, format_str4, str_error4
+  character(len=20) :: format_value1, format_value2, format_value3, format_value4
+  character(len=20) :: str_value1, str_value2, str_value3, str_value4
+  character(len=20) :: str_conv
+  double precision  :: value1, value2, value3, value4
+  double precision  :: error1, error2, error3, error4
+  integer           :: size1,size2,size3,size4
+
+  double precision :: conv_crit
+
   sending =.False.
 
   rss  = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
@@ -537,14 +549,60 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
 
           if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
             time1 = time
-            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1)', c, &
-              pt2_data     % pt2(pt2_stoch_istate) +E, &
-              pt2_data_err % pt2(pt2_stoch_istate), &
-              pt2_data     % variance(pt2_stoch_istate), &
-              pt2_data_err % variance(pt2_stoch_istate), &
-              pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
-              pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
-              time-time0
+
+            value1 =  pt2_data     % pt2(pt2_stoch_istate) + E
+            error1 =  pt2_data_err % pt2(pt2_stoch_istate)           
+            value2 =  pt2_data     % pt2(pt2_stoch_istate)
+            error2 =  pt2_data_err % pt2(pt2_stoch_istate)           
+            value3 =  pt2_data     % variance(pt2_stoch_istate)           
+            error3 =  pt2_data_err % variance(pt2_stoch_istate)           
+            value4 =  pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate)           
+            error4 =  pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate)           
+            
+            ! Max size of the values (FX.Y) with X=size
+            size1 = 15
+            size2 = 12
+            size3 = 12
+            size4 = 12
+
+            ! To generate the format: number(error)
+            call format_w_error(value1,error1,size1,8,format_value1,str_error1)
+            call format_w_error(value2,error2,size2,8,format_value2,str_error2)
+            call format_w_error(value3,error3,size3,8,format_value3,str_error3)
+            call format_w_error(value4,error4,size4,8,format_value4,str_error4)
+
+            ! value > string with the right format
+            write(str_value1,'('//format_value1//')') value1
+            write(str_value2,'('//format_value2//')') value2
+            write(str_value3,'('//format_value3//')') value3
+            write(str_value4,'('//format_value4//')') value4
+
+            ! Convergence criterion
+            conv_crit = dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+                  (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) )
+            write(str_conv,'(G10.3)') conv_crit
+
+            write(*,'(I10,X,X,A20,X,A16,X,A16,X,A16,X,A12,X,F10.1)') c,&
+            adjustl(adjustr(str_value1)//'('//str_error1(1:1)//')'),&
+            adjustl(adjustr(str_value2)//'('//str_error2(1:1)//')'),&
+            adjustl(adjustr(str_value3)//'('//str_error3(1:1)//')'),&
+            adjustl(adjustr(str_value4)//'('//str_error4(1:1)//')'),&
+            adjustl(str_conv),& 
+            time-time0
+
+            ! Old print
+            !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, &
+            !  pt2_data     % pt2(pt2_stoch_istate) +E, &
+            !  pt2_data_err % pt2(pt2_stoch_istate), &
+            !  pt2_data     % variance(pt2_stoch_istate), &
+            !  pt2_data_err % variance(pt2_stoch_istate), &
+            !  pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+            !  pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+            !  time-time0, &
+            !  pt2_data     % pt2(pt2_stoch_istate), &
+            !  dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+            !      (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) )
+
             if (stop_now .or. (                                      &
                   (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
                   (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
@@ -844,6 +902,7 @@ END_PROVIDER
      if (tooth_width == 0.d0) then
        tooth_width = max(1.d-15,sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))))
      endif
+     ASSERT(tooth_width > 0.d0)
      do i=pt2_n_0(t)+1, pt2_n_0(t+1)
        pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
      end do
diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f
index 30fc7ce0..debae596 100644
--- a/src/cipsi/run_pt2_slave.irp.f
+++ b/src/cipsi/run_pt2_slave.irp.f
@@ -31,11 +31,12 @@ subroutine run_pt2_slave(thread,iproc,energy)
 
   double precision, intent(in)    :: energy(N_states_diag)
   integer,  intent(in)            :: thread, iproc
-  if (N_det > 100000 ) then
-    call run_pt2_slave_large(thread,iproc,energy)
-  else
-    call run_pt2_slave_small(thread,iproc,energy)
-  endif
+  call run_pt2_slave_large(thread,iproc,energy)
+!  if (N_det > 100000 ) then
+!   call run_pt2_slave_large(thread,iproc,energy)
+! else
+!   call run_pt2_slave_small(thread,iproc,energy)
+! endif
 end
 
 subroutine run_pt2_slave_small(thread,iproc,energy)
@@ -66,6 +67,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
 
   double precision, external :: memory_of_double, memory_of_int
   integer :: bsize ! Size of selection buffers
+!  logical :: sending
 
   allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
   allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
@@ -83,6 +85,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
   buffer_ready = .False.
   n_tasks = 1
 
+!  sending = .False.
   done = .False.
   do while (.not.done)
 
@@ -116,13 +119,14 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
     do k=1,n_tasks
       call pt2_alloc(pt2_data(k),N_states)
       b%cur = 0
-!      double precision :: time2
-!      call wall_time(time2)
+!double precision :: time2
+!call wall_time(time2)
       call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
-!      call wall_time(time1)
-!      print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
+!call wall_time(time1)
+!print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
     enddo
     call wall_time(time1)
+!print *,  '-->', i_generator(1), time1-time0, n_tasks
 
     integer, external :: tasks_done_to_taskserver
     if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
@@ -160,11 +164,6 @@ end subroutine
 subroutine run_pt2_slave_large(thread,iproc,energy)
  use selection_types
  use f77_zmq
- BEGIN_DOC
-! This subroutine can miss important determinants when the PT2 is completely
-! computed. It should be called only for large workloads where the PT2 is
-! interrupted before the end
- END_DOC
   implicit none
 
   double precision, intent(in)    :: energy(N_states_diag)
@@ -190,12 +189,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 
   integer :: bsize ! Size of selection buffers
   logical :: sending
-  double precision :: time_shift
-
   PROVIDE global_selection_buffer global_selection_buffer_lock
 
-  call random_number(time_shift)
-  time_shift = time_shift*15.d0
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
@@ -213,9 +208,6 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
 
   sending = .False.
   done = .False.
-  double precision :: time0, time1
-  call wall_time(time0)
-  time0 = time0+time_shift
   do while (.not.done)
 
     integer, external :: get_tasks_from_taskserver
@@ -242,28 +234,25 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
       ASSERT (b%N == bsize)
     endif
 
+    double precision :: time0, time1
+    call wall_time(time0)
     call pt2_alloc(pt2_data,N_states)
     b%cur = 0
     call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
+    call wall_time(time1)
 
     integer, external :: tasks_done_to_taskserver
     if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
       done = .true.
     endif
     call sort_selection_buffer(b)
-
-    call wall_time(time1)
-!    if (time1-time0 > 15.d0) then
-      call omp_set_lock(global_selection_buffer_lock)
-      global_selection_buffer%mini = b%mini
-      call merge_selection_buffers(b,global_selection_buffer)
-      b%cur=0
-      call omp_unset_lock(global_selection_buffer_lock)
-      call wall_time(time0)
-!    endif
-
     call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
-    if ( iproc == 1 .or. i_generator < 100 .or. done) then
+    call omp_set_lock(global_selection_buffer_lock)
+    global_selection_buffer%mini = b%mini
+    call merge_selection_buffers(b,global_selection_buffer)
+    b%cur=0
+    call omp_unset_lock(global_selection_buffer_lock)
+    if ( iproc == 1 ) then
       call omp_set_lock(global_selection_buffer_lock)
       call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
       global_selection_buffer%cur = 0
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index d4f184f3..6f40a809 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -312,9 +312,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   end do
   deallocate(indices)
 
-!  !$OMP CRITICAL
-!  print *,  'Step1: ', i_generator, preinteresting(0)
-!  !$OMP END CRITICAL
 
   allocate(banned(mo_num, mo_num,2), bannedOrb(mo_num, 2))
   allocate (mat(N_states, mo_num, mo_num))
@@ -466,17 +463,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
           fullinteresting(sze+1) = i
         end if
       end do
-
       allocate (fullminilist (N_int, 2, fullinteresting(0)), &
                     minilist (N_int, 2,     interesting(0)) )
-!      if(pert_2rdm)then
-!        allocate(coef_fullminilist_rev(N_states,fullinteresting(0)))
-!        do i=1,fullinteresting(0)
-!          do j = 1, N_states
-!            coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j)
-!          enddo
-!        enddo
-!      endif
 
       do i=1,fullinteresting(0)
         fullminilist(:,:,i) = psi_det_sorted(:,:,fullinteresting(i))
@@ -524,33 +512,19 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
 
             call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
             if(fullMatch) cycle
-! !$OMP CRITICAL
-!  print *,  'Step3: ', i_generator, h1, interesting(0)
-! !$OMP END CRITICAL
 
             call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)
-
-!            if(.not.pert_2rdm)then
-             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
-!            else
-!             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
-!            endif
+            call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
           end if
         enddo
         if(s1 /= s2) monoBdo = .false.
       enddo
       deallocate(fullminilist,minilist)
-!      if(pert_2rdm)then
-!        deallocate(coef_fullminilist_rev)
-!      endif
     enddo
   enddo
   deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
   deallocate(banned, bannedOrb,mat)
 end subroutine
-
-
-
 subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
   use bitmasks
   use selection_types
@@ -571,7 +545,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
   double precision, external :: diag_H_mat_elem_fock
   double precision :: E_shift
   double precision :: s_weight(N_states,N_states)
-  logical, external :: is_in_wavefunction
   PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs
   do jstate=1,N_states
     do istate=1,N_states
@@ -607,18 +580,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
 ! to a determinant of the future. In that case, the determinant will be
 ! detected as already generated when generating in the future with a
 ! double excitation.
-!
-!      if (.not.do_singles) then
-!        if ((h1 == p1) .or. (h2 == p2)) then
-!          cycle
-!        endif
-!      endif
-!
-!      if (.not.do_doubles) then
-!        if ((h1 /= p1).and.(h2 /= p2)) then
-!          cycle
-!        endif
-!      endif
 ! -----
 
       if(bannedOrb(p2, s2)) cycle
@@ -751,7 +712,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         if (delta_E < 0.d0) then
             tmp = -tmp
         endif
+
+        !e_pert(istate) = alpha_h_psi * alpha_h_psi / (E0(istate) - Hii)
         e_pert(istate) = 0.5d0 * (tmp - delta_E)
+
         if (dabs(alpha_h_psi) > 1.d-4) then
           coef(istate) = e_pert(istate) / alpha_h_psi
         else
@@ -864,6 +828,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
 !!!BEGIN_DEBUG
 !      ! To check if the pt2 is taking determinants already in the wf
 !      if (is_in_wavefunction(det(N_int,1),N_int)) then
+!        logical, external :: is_in_wavefunction
 !        print*, 'A determinant contributing to the pt2 is already in'
 !        print*, 'the wave function:'
 !        call  print_det(det(N_int,1),N_int)
@@ -971,13 +936,10 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere
 
         call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int)
         if(nt == 4) then
-!          call get_d2_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
           call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
         else if(nt == 3) then
-!          call get_d1_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
           call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
         else
-!          call get_d0_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
           call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
         end if
     else if(nt == 4) then
@@ -1537,8 +1499,6 @@ subroutine past_d2(banned, p, sp)
   end if
 end
 
-
-
 subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
   use bitmasks
   implicit none
diff --git a/src/cipsi/slave_cipsi.irp.f b/src/cipsi/slave_cipsi.irp.f
index f96aaa6a..ddfc050e 100644
--- a/src/cipsi/slave_cipsi.irp.f
+++ b/src/cipsi/slave_cipsi.irp.f
@@ -311,7 +311,7 @@ subroutine run_slave_main
           if (mpi_master) then
             print *,  'Running PT2'
           endif
-          !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target)
+          !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
           i = omp_get_thread_num()
           call run_pt2_slave(0,i,pt2_e0_denominator)
           !$OMP END PARALLEL
diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f
index 5fc9db0f..35e80eb8 100644
--- a/src/cipsi/stochastic_cipsi.irp.f
+++ b/src/cipsi/stochastic_cipsi.irp.f
@@ -69,8 +69,8 @@ subroutine run_stochastic_cipsi
 
   do while (                                                         &
         (N_det < N_det_max) .and.                                    &
-        (sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and.               &
-        (sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and.     &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and.               &
+        (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.     &
         (correlation_energy_ratio <= correlation_energy_ratio_max)   &
         )
       write(*,'(A)')  '--------------------------------------------------------------------------------'
@@ -98,6 +98,7 @@ subroutine run_stochastic_cipsi
 
     call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
+    call print_mol_properties()
     N_iter += 1
 
     if (qp_stop()) exit
@@ -136,6 +137,7 @@ subroutine run_stochastic_cipsi
        pt2_data , pt2_data_err, N_det, N_configuration, N_states, psi_s2)
     call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det)
     call print_extrapolated_energy()
+    call print_mol_properties()
   endif
   call pt2_dealloc(pt2_data)
   call pt2_dealloc(pt2_data_err)
diff --git a/src/cipsi_tc_bi_ortho/EZFIO.cfg b/src/cipsi_tc_bi_ortho/EZFIO.cfg
new file mode 100644
index 00000000..7fcf19eb
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/EZFIO.cfg
@@ -0,0 +1,36 @@
+[save_wf_after_selection]
+type: logical
+doc: If true, saves the wave function after the selection, before the diagonalization
+interface: ezfio,provider,ocaml
+default: False
+
+[seniority_max]
+type: integer
+doc: Maximum number of allowed open shells. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_ref]
+type: integer
+doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration
+interface: ezfio,ocaml,provider
+default: 1
+
+[excitation_max]
+type: integer
+doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_alpha_max]
+type: integer
+doc: Maximum number of excitation for alpha determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_beta_max]
+type: integer
+doc: Maximum number of excitation for beta determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED
new file mode 100644
index 00000000..4dd1af36
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/NEED
@@ -0,0 +1,6 @@
+mpi
+perturbation
+zmq
+iterations_tc
+csf
+tc_bi_ortho
diff --git a/src/cipsi_tc_bi_ortho/cipsi.irp.f b/src/cipsi_tc_bi_ortho/cipsi.irp.f
new file mode 100644
index 00000000..fb907cb3
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f
@@ -0,0 +1,136 @@
+subroutine run_cipsi
+
+  BEGIN_DOC
+  ! Selected Full Configuration Interaction with deterministic selection and
+  ! stochastic PT2.
+  END_DOC
+
+  use selection_types
+
+  implicit none
+
+  integer                        :: i,j,k,ndet
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  logical, external :: qp_stop
+
+  double precision :: threshold_generators_save
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+
+  PROVIDE H_apply_buffer_allocated distributed_davidson
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has, print_pt2
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2  = -huge(1.e0)
+  pt2_data % rpt2 = -huge(1.e0)
+  pt2_data % overlap(:,:) = 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+  call ezfio_has_hartree_fock_energy(has)
+  if (has) then
+    call ezfio_get_hartree_fock_energy(hf_energy_ref)
+  else
+    hf_energy_ref = ref_bitmask_energy
+  endif
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+  endif
+
+  correlation_energy_ratio = 0.d0
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    if (do_pt2) then
+      call pt2_dealloc(pt2_data)
+      call pt2_dealloc(pt2_data_err)
+      call pt2_alloc(pt2_data, N_states)
+      call pt2_alloc(pt2_data_err, N_states)
+      threshold_generators_save = threshold_generators
+      threshold_generators = 1.d0
+      SOFT_TOUCH threshold_generators
+      call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+      threshold_generators = threshold_generators_save
+      SOFT_TOUCH threshold_generators
+    else
+      call pt2_dealloc(pt2_data)
+      call pt2_alloc(pt2_data, N_states)
+      call ZMQ_selection(to_select, pt2_data)
+    endif
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf()
+
+    if (save_wf_after_selection) then
+      call save_wavefunction
+    endif
+
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+    if (qp_stop()) exit
+  enddo
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
diff --git a/src/cipsi_tc_bi_ortho/energy.irp.f b/src/cipsi_tc_bi_ortho/energy.irp.f
new file mode 100644
index 00000000..16f4528e
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/energy.irp.f
@@ -0,0 +1,51 @@
+BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ]
+ implicit none
+ BEGIN_DOC
+ ! If true, initialize pt2_E0_denominator
+ END_DOC
+ initialize_pt2_E0_denominator = .True.
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! E0 in the denominator of the PT2
+ END_DOC
+ integer :: i,j
+
+  pt2_E0_denominator = eigval_right_tc_bi_orth
+
+! if (initialize_pt2_E0_denominator) then
+!   if (h0_type == "EN") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else if (h0_type == "HF") then
+!     do i=1,N_states
+!       j = maxloc(abs(psi_coef(:,i)),1)
+!       pt2_E0_denominator(i) = psi_det_hii(j)
+!     enddo
+!   else if (h0_type == "Barycentric") then
+!     pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
+!   else if (h0_type == "CFG") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else
+!     print *,  h0_type, ' not implemented'
+!     stop
+!   endif
+!  do i=1,N_states
+!    call write_double(6,pt2_E0_denominator(i)+nuclear_repulsion, 'PT2 Energy denominator')
+!  enddo
+! else
+!   pt2_E0_denominator = -huge(1.d0)
+! endif
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/environment.irp.f b/src/cipsi_tc_bi_ortho/environment.irp.f
new file mode 100644
index 00000000..5c0e0820
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/environment.irp.f
@@ -0,0 +1,14 @@
+BEGIN_PROVIDER [ integer, nthreads_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_pt2 = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_PT2',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_pt2
+   call write_int(6,nthreads_pt2,'Target number of threads for PT2')
+ endif
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/fock_diag.irp.f b/src/cipsi_tc_bi_ortho/fock_diag.irp.f
new file mode 100644
index 00000000..af6849ab
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/fock_diag.irp.f
@@ -0,0 +1,95 @@
+subroutine build_fock_tmp_tc(fock_diag_tmp,det_ref,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+! Build the diagonal of the Fock matrix corresponding to a generator
+! determinant. $F_{00}$ is $\langle i|H|i \rangle = E_0$.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: det_ref(Nint,2)
+  double precision, intent(out)  :: fock_diag_tmp(2,mo_num+1)
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: ne(2), i, j, ii, jj
+  double precision               :: E0
+
+  ! Compute Fock matrix diagonal elements
+  call bitstring_to_list_ab(det_ref,occ,Ne,Nint)
+
+  fock_diag_tmp = 0.d0
+  E0 = 0.d0
+
+  if (Ne(1) /= elec_alpha_num) then
+    print *,  'Error in build_fock_tmp_tc (alpha)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+  if (Ne(2) /= elec_beta_num) then
+    print *, 'Error in build_fock_tmp_tc (beta)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+
+  ! Occupied MOs
+  do ii=1,elec_alpha_num
+    i = occ(ii,1)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      if (i==j) cycle
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+      E0 = E0 + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do ii=1,elec_beta_num
+    i = occ(ii,2)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      if (i==j) cycle
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  ! Virtual MOs
+  do i=1,mo_num
+    if (fock_diag_tmp(1,i) /= 0.d0) cycle
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do i=1,mo_num
+    if (fock_diag_tmp(2,i) /= 0.d0) cycle
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  fock_diag_tmp(1,mo_num+1) = E0
+  fock_diag_tmp(2,mo_num+1) = E0
+
+end
diff --git a/src/cipsi_tc_bi_ortho/get_d.irp.f b/src/cipsi_tc_bi_ortho/get_d.irp.f
new file mode 100644
index 00000000..9421787e
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/get_d.irp.f
@@ -0,0 +1,1902 @@
+
+! ---
+
+double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
+
+  use bitmasks
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer,           intent(in) :: s1, s2, h1, h2, p1, p2
+  integer(bit_kind), intent(in) :: phasemask(Nint,2)
+
+  double precision, save        :: res(0:1) = (/1d0, -1d0/)
+
+  integer                       :: np
+  integer                       :: h1_int, h2_int
+  integer                       :: p1_int, p2_int
+  integer                       :: h1_bit, h2_bit
+  integer                       :: p1_bit, p2_bit
+  logical                       :: change
+
+  h1_int = shiftr(h1-1,bit_kind_shift)+1
+  h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1
+
+  h2_int = shiftr(h2-1,bit_kind_shift)+1
+  h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1
+
+  p1_int = shiftr(p1-1,bit_kind_shift)+1
+  p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1
+
+  p2_int = shiftr(p2-1,bit_kind_shift)+1
+  p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1
+
+  ! Put the phasemask bits at position 0, and add them all
+  h1_bit = int( shiftr( phasemask(h1_int,s1), h1_bit ) )
+  p1_bit = int( shiftr( phasemask(p1_int,s1), p1_bit ) )
+  h2_bit = int( shiftr( phasemask(h2_int,s2), h2_bit ) )
+  p2_bit = int( shiftr( phasemask(p2_int,s2), p2_bit ) )
+
+  np = h1_bit + p1_bit + h2_bit + p2_bit
+
+  if(p1 < h1) np = np + 1
+  if(p2 < h2) np = np + 1
+
+  if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1
+  get_phase_bi = res(iand(np,1))
+
+end function get_phase_bi
+
+! ---
+
+subroutine get_d3_htc(gen, bannedOrb, banned, mat_m, mat_p, mask, p, sp, rcoefs, lcoefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+  integer,           intent(in)   :: p(0:4,2), sp
+  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision,  intent(in)   :: rcoefs(N_states), lcoefs(N_states)
+  double precision, intent(inout) :: mat_m(N_states, mo_num, mo_num), mat_p(N_states, mo_num, mo_num)
+
+  integer(bit_kind)               :: det(N_int, 2)
+  integer                         :: k, h1, h2, p1, p2, puti, putj
+  double precision                :: i_h_alpha, alpha_h_i 
+  logical                         :: ok
+
+  if(sp == 3) then ! AB
+
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1 = 1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2 = 1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, 1)) cycle ! rentable?
+
+        call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+        call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha)
+        call htilde_mu_mat_bi_ortho_tot(det,gen, N_int, alpha_h_i)
+!        call hji_hij_mu_mat_tot(gen, det, N_int,i_h_alpha , alpha_h_i)
+        if( dabs(alpha_h_i) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_p(k, p1, p2) = mat_p(k, p1, p2) + rcoefs(k) * alpha_h_i 
+          enddo
+        endif
+        if( dabs(i_h_alpha) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_m(k, p1, p2) = mat_m(k, p1, p2) + lcoefs(k) * i_h_alpha 
+          enddo
+        endif
+
+      enddo
+    enddo
+
+  else ! AA BB
+
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti = 1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj = puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, 1)) cycle ! rentable?
+
+        call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+!        call hji_hij_mu_mat_tot(gen, det, N_int, i_h_alpha, alpha_h_i)
+        call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha)
+        call htilde_mu_mat_bi_ortho_tot( det,gen, N_int, alpha_h_i)
+        if( dabs(alpha_h_i) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) + rcoefs(k) * alpha_h_i 
+          enddo
+        endif
+        if( dabs(i_h_alpha) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) + lcoefs(k) * i_h_alpha 
+          enddo
+        endif
+
+      enddo
+    enddo
+
+  endif
+
+end subroutine get_d3_htc
+
+! ---
+
+subroutine get_d3_h(gen, bannedOrb, banned, mat, mask, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+  integer,           intent(in)   :: p(0:4,2), sp
+  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision,  intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+
+  integer(bit_kind)               :: det(N_int, 2)
+  integer                         :: k, h1, h2, p1, p2, puti, putj
+  double precision                :: hij
+  logical                         :: ok
+
+  if(sp == 3) then ! AB
+
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1 = 1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2 = 1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, 1)) cycle ! rentable?
+
+        call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        if (hij == 0.d0) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
+        enddo
+
+      enddo
+    enddo
+
+  else ! AA BB
+
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti = 1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj = puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, 1)) cycle ! rentable?
+
+        call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+        enddo
+
+      enddo
+    enddo
+
+  endif
+
+end subroutine get_d3_h
+
+! ---
+
+subroutine get_d2(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi
+
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: hij, hji, phase
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2)
+
+  ma = sp
+  print*,'in get_d2'
+  stop
+  if(p(0,1) > p(0,2)) ma = 1
+  if(p(0,1) < p(0,2)) ma = 2
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then
+    print*,'in sp == 3'
+    if(ma == 2) bant = 2
+
+    if(tip == 3) then
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+
+      do i = 1, 3
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+
+        ! <p1 p2|1/r12|h1 h2> --> <p1 p2| w_ee^h + t^nh | h1 h2> --> < p2 p1 | H^tilde| h1 h2 >
+        ! 
+        !                      <p1 p2 | h1 h2>        -            <p2 p1 | h1 h2 >
+        ! < p2 p1 | H^tilde^dag| h1 h2 > = < h1 h2 | w_ee^h + t^nh | p1 p2 >
+        hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2, p1, h1, h2)
+        hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2, h1, p1, p2)
+        if (hij == 0.d0) cycle
+
+        hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hij
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hij
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end do
+    else
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+
+          hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hij
+              mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hji
+            enddo
+          endif
+        end do
+      end do
+    end if
+
+  else
+    print*,'NOT in sp == 3'
+    if(tip == 0) then
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)
+          hji = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2,h1, p1, p2)
+          if (hij == 0.d0) cycle
+
+          hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hij
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hji
+          enddo
+        end do
+      end do
+    else if(tip == 3) then
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+        hji = mo_bi_ortho_tc_two_e(h1, h2, p1, p2)
+        if (hij == 0.d0) cycle
+
+        hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        hji = hji * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hij
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hij
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        endif
+      end do
+    else ! tip == 4
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2))
+        hji = (mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2,h1, p1, p2))
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hij
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end if
+    end if
+  end if
+
+end subroutine get_d2
+
+! ---
+
+subroutine get_d1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+  integer :: mm
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  double precision, allocatable  :: hij_cache(:,:)
+  double precision               :: hij, tmp_row_ij(N_states, mo_num), tmp_row_ij2(N_states, mo_num)
+  double precision, allocatable  :: hji_cache(:,:)
+  double precision               :: hji, tmp_row_ji(N_states, mo_num), tmp_row_ji2(N_states, mo_num)
+
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  allocate (hji_cache(mo_num,2))
+  lbanned = bannedOrb
+  print*,'in get d1'
+  call debug_det(gen, N_int)
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    print*,'in sp == 3'
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    print*,puti, hfix,p1,p2
+    if(.not. bannedOrb(puti, mi)) then
+!      print*,'not banned'
+      do mm = 1, mo_num
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+      enddo
+!      call get_mo_bi_ortho_tc_two_es(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+      tmp_row_ij = 0d0
+      tmp_row_ji = 0d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ij(k,putj) = tmp_row_ij(k,putj) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(putj,1) - hji_cache(putj,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,putj) = tmp_row_ji(k,putj) + hji * coefs(k,1)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ij(k,putj) = tmp_row_ij(k,putj) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(putj,2) - hji_cache(putj,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,putj) = tmp_row_ji(k,putj) + hji * coefs(k,1)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_ij(1:N_states,1:mo_num)
+        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_ji(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_ij(k,l)
+            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_ji(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row_ij = 0d0
+    tmp_row_ij2 = 0d0
+    tmp_row_ji = 0d0
+    tmp_row_ji2 = 0d0
+!    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
+!    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
+    do mm = 1, mo_num
+     hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1)
+     hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2)
+     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
+     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
+    enddo
+    putj = p1
+    do puti = 1, mo_num !HOT
+
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ij(k,puti) = tmp_row_ij(k,puti) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(puti,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,puti) = tmp_row_ji(k,puti) + hji * coefs(k,1)
+          enddo
+        endif
+      endif
+
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row_ij2(k,puti) = tmp_row_ij2(k,puti) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(puti,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row_ji2(k,puti) = tmp_row_ji2(k,puti) + hji * coefs(k,1)
+          enddo
+        endif
+      endif
+
+    enddo
+
+    if(mi == 1) then
+      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_ij(:,:)
+      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row_ij2(:,:)
+      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_ji(:,:)
+      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row_ji2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij(k,l)
+          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij2(k,l)
+          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ji(k,l)
+          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ji2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+    print*,'not in sp == 3'
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+!        call get_mo_bi_ortho_tc_two_es(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+!        call get_mo_bi_ortho_tc_two_es(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+        do mm = 1, mo_num
+         hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+         hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+         hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+         hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+        enddo
+        tmp_row_ij = 0d0
+        tmp_row_ji = 0d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:,1)
+          endif
+          hji = hji_cache(putj,1) - hji_cache(putj,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row_ji(:,putj) = tmp_row_ji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:,1)
+          endif
+          hji = hji_cache(putj,2) - hji_cache(putj,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row_ji(:,putj) = tmp_row_ji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+
+        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_ij(:,:puti-1)
+        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_ji(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_ij(k,l)
+            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_ji(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row_ij = 0d0
+      tmp_row_ij2 = 0d0
+      tmp_row_ji = 0d0
+      tmp_row_ji2 = 0d0
+!      call get_mo_bi_ortho_tc_two_es(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map)
+      do mm = 1, mo_num
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,pfix,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,pfix,mm,hfix)
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix)
+      enddo
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row_ij(k,puti) = tmp_row_ij(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+          hji = hji_cache(puti,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row_ji(k,puti) = tmp_row_ji(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row_ij2(k,puti) = tmp_row_ij2(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+          hji = hji_cache(puti,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row_ji2(k,puti) = tmp_row_ji2(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+      end do
+      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_ij(:,:p2-1)
+      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_ji(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij(k,l)
+          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ji(k,l)
+        enddo
+      enddo
+      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row_ij2(:,:p1-1)
+      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row_ji2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij2(k,l)
+          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ji2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1 = 1, p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2 = ib, p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+!        call i_h_j(gen, det, N_int, hij)
+        !!!! GUESS ON THE ORDER OF DETS 
+        print*,'compute hij'
+!        hij = 0.d0
+!        hji = 0.d0
+        call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k,1) * hij
+          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k,2) * hji
+        enddo
+      enddo
+    enddo
+
+end subroutine get_d1
+
+! ---
+
+subroutine get_d0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_m(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm
+  double precision :: hij, phase, hji
+  double precision, external :: get_phase_bi
+  logical :: ok
+
+  integer, parameter :: bant=1
+  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
+  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+  double precision, allocatable :: hji_cache1(:), hji_cache2(:)
+  allocate (hji_cache1(mo_num),hji_cache2(mo_num))
+
+  print*,'in get d0'
+!  call debug_det(gen, N_int)
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+!   print*,'in AB'
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+!      call get_mo_bi_ortho_tc_two_es(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+      do mm =1, mo_num
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(h2,h1,mm,p1)
+      enddo
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+!          print*,'in p1 == h1 or p2 == h2'
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+!          call i_h_j(gen, det, N_int, hij)
+           !!! GUESS ON THE ORDER 
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hji)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hij)
+        else
+!          print*,'ELSE '
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache1(p2) * phase
+          hji = hji_cache1(p2) * phase
+        end if
+        if (hij == 0.d0) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k,1) * hij  ! HOTSPOT
+          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k,2) * hji  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+!    print*, 'in AA BB' 
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(p2,p1,mm,puti)
+       hij_cache2(mm) = mo_bi_ortho_tc_two_e(p1,p2,mm,puti)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1)
+       hji_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2)
+      enddo
+!      call get_mo_bi_ortho_tc_two_es(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map)
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+!          call i_h_j(gen, det, N_int, hij)
+          !!! GUESS 
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hji)
+          if (hij == 0.d0.or.hji == 0.d0) cycle
+        else
+          hji = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))
+          hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) -  mo_bi_ortho_tc_two_e(puti, putj, p2, p1))
+          if (hij == 0.d0.or.hji==0.d0) cycle
+          hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+          mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
+        enddo
+      end do
+    end do
+  end if
+
+!  deallocate(hij_cache1,hij_cache2)
+!  deallocate(hji_cache1,hji_cache2)
+
+end subroutine get_d0
+
+! ---
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm2(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!
+!  implicit none
+!
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  integer(bit_kind), intent(in)   :: mask(N_int, 2), gen(N_int, 2), phasemask(N_int,2)
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  integer, parameter              :: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+!  integer, parameter              :: turn2(2) = (/2, 1/)
+!  integer, parameter              :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+!
+!  integer                         :: i, j, k, tip, ma, mi, puti, putj
+!  integer                         :: h1, h2, p1, p2, i1, i2
+!  integer                         :: bant
+!  double precision                :: hij_p, hij_m, phase
+!
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int 
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  bant = 1
+!
+!  tip = p(0,1) * p(0,2)
+!
+!  ma = sp
+!  if(p(0,1) > p(0,2)) ma = 1
+!  if(p(0,1) < p(0,2)) ma = 2
+!  mi = mod(ma, 2) + 1
+!
+!  if(sp == 3) then
+!    if(ma == 2) bant = 2
+!    if(tip == 3) then
+!      puti = p(1, mi)
+!      if(bannedOrb(puti, mi)) return
+!      h1 = h(1, ma)
+!      h2 = h(2, ma)
+!
+!      do i = 1, 3
+!        putj = p(i, ma)
+!        if(banned(putj,puti,bant)) cycle
+!        i1 = turn3(1,i)
+!        i2 = turn3(2,i)
+!        p1 = p(i1, ma)
+!        p2 = p(i2, ma)
+!
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!        hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!        hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!
+!        if(ma == 1) then
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k = 1, N_states
+!            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p
+!            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m
+!          enddo
+!        else
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k = 1, N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end if
+!      end do
+!
+!    else
+!
+!      h1 = h(1,1)
+!      h2 = h(1,2)
+!      do j = 1,2
+!        putj = p(j, 2)
+!        if(bannedOrb(putj, 2)) cycle
+!        p2 = p(turn2(j), 2)
+!        do i = 1,2
+!          puti = p(i, 1)
+!
+!          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+!          p1 = p(turn2(i), 1)
+!
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k = 1, N_states
+!              mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!              mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!            enddo
+!          endif
+!        end do
+!      end do
+!    end if
+!
+!  else
+!    if(tip == 0) then
+!      h1 = h(1, ma)
+!      h2 = h(2, ma)
+!      do i=1,3
+!        puti = p(i, ma)
+!        if(bannedOrb(puti,ma)) cycle
+!        do j=i+1,4
+!          putj = p(j, ma)
+!          if(bannedOrb(putj,ma)) cycle
+!          if(banned(puti,putj,1)) cycle
+!
+!          i1 = turn2d(1, i, j)
+!          i2 = turn2d(2, i, j)
+!          p1 = p(i1, ma)
+!          p2 = p(i2, ma)
+!
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!                - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!                - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end do
+!      end do
+!
+!    else if(tip == 3) then
+!      h1 = h(1, mi)
+!      h2 = h(1, ma)
+!      p1 = p(1, mi)
+!      do i=1,3
+!        puti = p(turn3(1,i), ma)
+!        if(bannedOrb(puti,ma)) cycle
+!        putj = p(turn3(2,i), ma)
+!        if(bannedOrb(putj,ma)) cycle
+!        if(banned(puti,putj,1)) cycle
+!        p2 = p(i, ma)
+!
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!        hij_p = hij_p * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+!        hij_m = hij_m * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+!        if (puti < putj) then
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        else
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p
+!            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m
+!          enddo
+!        endif
+!      end do
+!    else ! tip == 4
+!      puti = p(1, sp)
+!      putj = p(2, sp)
+!      if(.not. banned(puti,putj,1)) then
+!        p1 = p(1, mi)
+!        p2 = p(2, mi)
+!        h1 = h(1, mi)
+!        h2 = h(2, mi)
+!
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end if
+!      end if
+!    end if
+!  end if
+!
+!end subroutine get_pm2
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!
+!  implicit none
+!
+!  integer(bit_kind)               :: det(N_int, 2)
+!  integer(bit_kind), intent(in)   :: mask(N_int, 2), gen(N_int, 2)
+!  integer(bit_kind), intent(in)   :: phasemask(N_int,2)
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int
+!
+!  logical                         :: ok
+!  logical, allocatable            :: lbanned(:,:)
+!  integer                         :: bant
+!  integer                         :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+!  integer                         :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+!  double precision                :: tmp_row_ij_p (N_states, mo_num), tmp_row_ij_m (N_states, mo_num)
+!  double precision                :: hij_p, hij_m, tmp_row_ij2_p(N_states, mo_num), tmp_row_ij2_m(N_states, mo_num)
+!  double precision, allocatable   :: hijp_cache(:,:), hijm_cache(:,:)
+!
+!  integer, parameter              :: turn2(2) = (/2,1/)
+!  integer, parameter              :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  allocate( lbanned(mo_num, 2) )
+!  allocate( hijp_cache(mo_num,2), hijm_cache(mo_num,2) )
+!  lbanned = bannedOrb
+!
+!  do i=1, p(0,1)
+!    lbanned(p(i,1), 1) = .true.
+!  end do
+!  do i=1, p(0,2)
+!    lbanned(p(i,2), 2) = .true.
+!  end do
+!
+!  ma = 1
+!  if(p(0,2) >= 2) ma = 2
+!  mi = turn2(ma)
+!
+!  bant = 1
+!
+!  if(sp == 3) then
+!    !move MA
+!    if(ma == 2) bant = 2
+!    puti = p(1,mi)
+!    hfix = h(1,ma)
+!    p1 = p(1,ma)
+!    p2 = p(2,ma)
+!    if(.not. bannedOrb(puti, mi)) then
+!
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!      tmp_row_ij_p = 0d0
+!      tmp_row_ij_m = 0d0
+!      do putj=1, hfix-1
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!
+!        hij_p = hijp_cache(putj,1) - hijp_cache(putj,2)
+!        hij_m = hijm_cache(putj,1) - hijm_cache(putj,2)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_ij_p(k,putj) = tmp_row_ij_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,putj) = tmp_row_ij_m(k,putj) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end do
+!      do putj=hfix+1, mo_num
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!
+!        hij_p = hijp_cache(putj,2) - hijp_cache(putj,1)
+!        hij_m = hijm_cache(putj,2) - hijm_cache(putj,1)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_ij_p(k,putj) = tmp_row_ij_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,putj) = tmp_row_ij_m(k,putj) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end do
+!
+!      if(ma == 1) then
+!        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_ij_p(1:N_states,1:mo_num)
+!        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_ij_m(1:N_states,1:mo_num)
+!      else
+!        do l=1,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_ij_p(k,l)
+!            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_ij_m(k,l)
+!          enddo
+!        enddo
+!      end if
+!    end if
+!
+!    !MOVE MI
+!    pfix = p(1,mi)
+!    tmp_row_ij_p  = 0d0
+!    tmp_row_ij_m  = 0d0
+!    tmp_row_ij2_p = 0d0
+!    tmp_row_ij2_m = 0d0
+!
+!    call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, pfix, p1, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!    call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, pfix, p2, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!    call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, pfix, p1, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!    call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, pfix, p2, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!    putj = p1
+!    do puti=1,mo_num !HOT
+!      if(lbanned(puti,mi)) cycle
+!      !p1 fixed
+!      putj = p1
+!      if(.not. banned(putj,puti,bant)) then
+!
+!        hij_p = hijp_cache(puti,2)
+!        hij_m = hijm_cache(puti,2)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_ij_p(k,puti) = tmp_row_ij_p(k,puti) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,puti) = tmp_row_ij_m(k,puti) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end if
+!
+!      putj = p2
+!      if(.not. banned(putj,puti,bant)) then
+!
+!        hij_p = hijp_cache(puti,1)
+!        hij_m = hijm_cache(puti,1)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!          do k=1,N_states
+!            tmp_row_ij2_p(k,puti) = tmp_row_ij2_p(k,puti) + hij_p * coefs(k)
+!            tmp_row_ij2_m(k,puti) = tmp_row_ij2_m(k,puti) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end if
+!    end do
+!
+!    if(mi == 1) then
+!      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_ij_p (:,:)
+!      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row_ij2_p(:,:)
+!      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_ij_m (:,:)
+!      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row_ij2_m(:,:)
+!    else
+!      do l=1,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij_p (k,l)
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ij_m (k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ij2_m(k,l)
+!        enddo
+!      enddo
+!    end if
+!
+!  else  ! sp /= 3
+!
+!    if(p(0,ma) == 3) then
+!      do i=1,3
+!        hfix = h(1,ma)
+!        puti = p(i, ma)
+!        p1 = p(turn3(1,i), ma)
+!        p2 = p(turn3(2,i), ma)
+!
+!        call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!        call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!        call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!        call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!        tmp_row_ij_p = 0d0
+!        tmp_row_ij_m = 0d0
+!        do putj=1,hfix-1
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!
+!          hij_p = hijp_cache(putj,1) - hijp_cache(putj,2)
+!          hij_m = hijm_cache(putj,1) - hijm_cache(putj,2)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!            tmp_row_ij_p(:,putj) = tmp_row_ij_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_ij_m(:,putj) = tmp_row_ij_m(:,putj) + hij_m * coefs(:)
+!          endif
+!        end do
+!        do putj=hfix+1,mo_num
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!
+!          hij_p = hijp_cache(putj,2) - hijp_cache(putj,1)
+!          hij_m = hijm_cache(putj,2) - hijm_cache(putj,1)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!            tmp_row_ij_p(:,putj) = tmp_row_ij_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_ij_m(:,putj) = tmp_row_ij_m(:,putj) + hij_m * coefs(:)
+!          endif
+!        end do
+!
+!        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_ij_p(:,:puti-1)
+!        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_ij_m(:,:puti-1)
+!        do l=puti,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_ij_p(k,l)
+!            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_ij_m(k,l)
+!          enddo
+!        enddo
+!      end do
+!    else
+!      hfix = h(1,mi)
+!      pfix = p(1,mi)
+!      p1 = p(1,ma)
+!      p2 = p(2,ma)
+!      tmp_row_ij_p  = 0d0
+!      tmp_row_ij_m  = 0d0
+!      tmp_row_ij2_p = 0d0
+!      tmp_row_ij2_m = 0d0
+!
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!      putj = p2
+!      do puti=1,mo_num
+!        if(lbanned(puti,ma)) cycle
+!        putj = p2
+!        if(.not. banned(puti,putj,1)) then
+!
+!          hij_p = hijp_cache(puti,1)
+!          hij_m = hijm_cache(puti,1)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_row_ij_p(k,puti) = tmp_row_ij_p(k,puti) + hij_p * coefs(k)
+!              tmp_row_ij_m(k,puti) = tmp_row_ij_m(k,puti) + hij_m * coefs(k)
+!            enddo
+!          endif
+!        end if
+!
+!        putj = p1
+!        if(.not. banned(puti,putj,1)) then
+!          hij_p = hijp_cache(puti,2)
+!          hij_m = hijm_cache(puti,2)
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+!            do k=1,N_states
+!              tmp_row_ij2_p(k,puti) = tmp_row_ij2_p(k,puti) + hij_p * coefs(k)
+!              tmp_row_ij2_m(k,puti) = tmp_row_ij2_m(k,puti) + hij_m * coefs(k)
+!            enddo
+!          endif
+!        end if
+!      end do
+!      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_ij_p(:,:p2-1)
+!      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_ij_m(:,:p2-1)
+!      do l=p2,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij_p(k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ij_m(k,l)
+!        enddo
+!      enddo
+!      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row_ij2_p(:,:p1-1)
+!      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row_ij2_m(:,:p1-1)
+!      do l=p1,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ij2_m(k,l)
+!        enddo
+!      enddo
+!    end if
+!  end if
+!  deallocate(lbanned,hijp_cache, hijm_cache)
+!
+! !! MONO
+!  if(sp == 3) then
+!    s1 = 1
+!    s2 = 2
+!  else
+!    s1 = sp
+!    s2 = sp
+!  end if
+!
+!  do i1 = 1, p(0,s1)
+!    ib = 1
+!    if(s1 == s2) ib = i1+1
+!    do i2 = ib, p(0,s2)
+!      p1 = p(i1,s1)
+!      p2 = p(i2,s2)
+!      if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+!      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+!
+!      call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!      call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!
+!      !DIR$ LOOP COUNT AVG(4)
+!      do k = 1, N_states
+!        mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p
+!        mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m
+!      enddo
+!    enddo
+!  enddo
+!
+!end subroutine get_pm1
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!  implicit none
+!
+!  integer(bit_kind)               :: det(N_int, 2)
+!  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+!  integer(bit_kind), intent(in)   :: phasemask(N_int,2)
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int 
+!  integer, parameter              :: bant=1
+!  integer                         :: i, j, k, s, h1, h2, p1, p2, puti, putj
+!  logical                         :: ok
+!  double precision                :: hij_p, hij_m, phase
+!  double precision, allocatable   :: hijp_cache1(:), hijp_cache2(:), hijm_cache1(:), hijm_cache2(:)
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  allocate( hijp_cache1(mo_num) , hijp_cache2(mo_num) )
+!  allocate( hijm_cache1(mo_num) , hijm_cache2(mo_num) )
+!
+!  if(sp == 3) then ! AB
+!    h1 = p(1,1)
+!    h2 = p(1,2)
+!    do p1=1, mo_num
+!      if(bannedOrb(p1, 1)) cycle
+!
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (p1, h2, h1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(p1, h2, h1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!
+!      do p2 = 1, mo_num
+!        if(bannedOrb(p2,2)) cycle
+!        if(banned(p1, p2, bant)) cycle ! rentable?
+!        if(p1 == h1 .or. p2 == h2) then
+!          call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+!          call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!          call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!        else
+!          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!          hij_p = hijp_cache1(p2) * phase
+!          hij_m = hijm_cache1(p2) * phase
+!        end if
+!        if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k = 1, N_states
+!          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p  ! HOTSPOT
+!          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m  ! HOTSPOT
+!        enddo
+!      end do
+!    end do
+!
+!  else ! AA BB
+!    p1 = p(1,sp)
+!    p2 = p(2,sp)
+!    do puti=1, mo_num
+!      if(bannedOrb(puti, sp)) cycle
+!
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (puti, p2, p1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (puti, p1, p2, mo_num, hijp_cache2, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(puti, p2, p1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(puti, p1, p2, mo_num, hijm_cache2, mo_integrals_tcdag_int_map)
+!
+!      do putj=puti+1, mo_num
+!        if(bannedOrb(putj, sp)) cycle
+!        if(banned(puti, putj, bant)) cycle ! rentable?
+!        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+!          call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+!          call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!          call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!        else
+!
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, puti, putj, mo_integrals_tc_int_map   ) &
+!                - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, puti, putj, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, puti, putj, mo_integrals_tcdag_int_map) &
+!                - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, puti, putj, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!
+!          hij_p = hij_p * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+!
+!        end if
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!          mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!        enddo
+!      end do
+!    end do
+!  end if
+!
+!  deallocate( hijp_cache1 , hijp_cache2 )
+!  deallocate( hijm_cache1 , hijm_cache2 )
+!
+!end subroutine get_pm0
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+! OLD unoptimized routines for debugging
+! ======================================
+
+subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, s, h1, h2, p1, p2, puti, putj
+  double precision :: hij, phase
+  double precision, external :: get_phase_bi
+  logical :: ok
+
+  integer :: bant
+  bant = 1
+
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * phase
+        end if
+        mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
+      end do
+    end do
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end do
+    end do
+  end if
+
+end subroutine get_d0_reference
+
+! ---
+
+subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision               :: hij, tmp_row_ij(N_states, mo_num), tmp_row_ij2(N_states, mo_num), hji
+  double precision, external     :: get_phase_bi
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+
+
+  allocate (lbanned(mo_num, 2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      tmp_row_ij = 0d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, putj, hfix)-mo_bi_ortho_tc_two_e(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+        tmp_row_ij(1:N_states,putj) = tmp_row_ij(1:N_states,putj) + hij * coefs(1:N_states)
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, hfix, putj)-mo_bi_ortho_tc_two_e(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+        tmp_row_ij(1:N_states,putj) = tmp_row_ij(1:N_states,putj) + hij * coefs(1:N_states)
+      end do
+
+      if(ma == 1) then
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row_ij(1:N_states,1:mo_num)
+      else
+        mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row_ij(1:N_states,1:mo_num)
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row_ij = 0d0
+    tmp_row_ij2 = 0d0
+    do puti=1,mo_num
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = mo_bi_ortho_tc_two_e(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+        tmp_row_ij(:,puti) = tmp_row_ij(:,puti) + hij * coefs(:)
+      end if
+
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = mo_bi_ortho_tc_two_e(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+        tmp_row_ij2(:,puti) = tmp_row_ij2(:,puti) + hij * coefs(:)
+      end if
+    end do
+
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row_ij(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row_ij2(:,:)
+    else
+      mat(:,p1,:) = mat(:,p1,:) + tmp_row_ij(:,:)
+      mat(:,p2,:) = mat(:,p2,:) + tmp_row_ij2(:,:)
+    end if
+  else
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        tmp_row_ij = 0d0
+        do putj=1,hfix-1
+          if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, putj, hfix)-mo_bi_ortho_tc_two_e(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:)
+        end do
+        do putj=hfix+1,mo_num
+          if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, hfix, putj)-mo_bi_ortho_tc_two_e(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:)
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row_ij(:,:puti-1)
+        mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row_ij(:,puti:)
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row_ij = 0d0
+      tmp_row_ij2 = 0d0
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = mo_bi_ortho_tc_two_e(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+          tmp_row_ij(:,puti) = tmp_row_ij(:,puti) + hij * coefs(:)
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = mo_bi_ortho_tc_two_e(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+          tmp_row_ij2(:,puti) = tmp_row_ij2(:,puti) + hij * coefs(:)
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row_ij(:,:p2-1)
+      mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row_ij(:,p2:)
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row_ij2(:,:p1-1)
+      mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row_ij2(:,p1:)
+    end if
+  end if
+  deallocate(lbanned)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
+      end do
+    end do
+
+end subroutine get_d1_reference
+
+! ---
+
+subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(2,N_int)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi
+
+  integer :: i, j, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2, mm
+  double precision :: hij, phase, hji
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2)
+
+  ma = sp
+  if(p(0,1) > p(0,2)) ma = 1
+  if(p(0,1) < p(0,2)) ma = 2
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then
+    if(ma == 2) bant = 2
+
+    if(tip == 3) then
+      puti = p(1, mi)
+      do i = 1, 3
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        h1 = h(1, ma)
+        h2 = h(2, ma)
+
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        if(ma == 1) then
+          mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij
+        else
+          mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+        end if
+      end do
+    else
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2
+        putj = p(j, 2)
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+
+          if(banned(puti,putj,bant)) cycle
+          p1 = p(turn2(i), 1)
+
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int)
+          mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+        end do
+      end do
+    end if
+
+  else
+    if(tip == 0) then
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+      puti = p(i, ma)
+      do j=i+1,4
+        putj = p(j, ma)
+        if(banned(puti,putj,1)) cycle
+
+        i1 = turn2d(1, i, j)
+        i2 = turn2d(2, i, j)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int)
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end do
+      end do
+    else if(tip == 3) then
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        putj = p(turn3(2,i), ma)
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int)
+        mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij
+      end do
+    else ! tip == 4
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int)
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end if
+    end if
+  end if
+
+end subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+! ---
+
diff --git a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f
new file mode 100644
index 00000000..4270e7b8
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f
@@ -0,0 +1,139 @@
+subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be okay for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm
+  double precision :: phase
+  double precision :: hij,hji
+  double precision, external :: get_phase_bi
+  logical :: ok
+
+  integer, parameter :: bant=1
+  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
+  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+  double precision, allocatable :: hji_cache1(:), hji_cache2(:)
+  allocate (hji_cache1(mo_num),hji_cache2(mo_num))
+!  print*,'in get_d0_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+!      call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(h2,h1,mm,p1)
+      enddo
+      !!!!!!!!!! <alpha|H|psi>
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache1(p2) * phase
+        end if
+        if (hij == (0.d0,0.d0)) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij  ! HOTSPOT
+        enddo
+      end do
+      !!!!!!!!!! <phi|H|alpha>
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hji = hji_cache1(p2) * phase
+        end if
+        if (hji == (0.d0,0.d0)) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+!      call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1)
+       hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(p2,p1,mm,puti)
+       hji_cache2(mm) = mo_bi_ortho_tc_two_e(p1,p2,mm,puti)
+      enddo
+      !!!!!!!!!! <alpha|H|psi>
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+          if (hij == 0.d0) cycle
+        else
+!          hij = (mo_two_e_integral_complex(p1, p2, puti, putj) -  mo_two_e_integral_complex(p2, p1, puti, putj))
+!          hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))
+          hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) -  mo_bi_ortho_tc_two_e(puti, putj, p2, p1))
+          if (hij == 0.d0) cycle
+          hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+        enddo
+      end do
+
+      !!!!!!!!!! <phi|H|alpha>
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+          if (hji == 0.d0) cycle
+        else
+          hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e( p2, p1, puti, putj))
+          if (hji == 0.d0) cycle
+          hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+        enddo
+      end do
+    end do
+  end if
+
+  deallocate(hij_cache1,hij_cache2)
+end
+
diff --git a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f
new file mode 100644
index 00000000..bc19e7e4
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f
@@ -0,0 +1,454 @@
+subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  double precision, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  double precision, allocatable :: hij_cache(:,:)
+  double precision               :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num)
+  double precision, allocatable :: hji_cache(:,:)
+  double precision               :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num)
+!  PROVIDE mo_integrals_map N_int
+!  print*,'in get_d1_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  allocate (hji_cache(mo_num,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+!      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+      enddo
+      !! <alpha|H|psi>
+      tmp_rowij = 0.d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l)
+          enddo
+        enddo
+      end if
+
+      !! <phi|H|alpha>
+      tmp_rowji = 0.d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hji = hji_cache(putj,1) - hji_cache(putj,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hji = hji_cache(putj,2) - hji_cache(putj,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_rowij  = 0.d0
+    tmp_rowij2 = 0.d0
+    tmp_rowji  = 0.d0
+    tmp_rowji2 = 0.d0
+!    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    do mm = 1, mo_num
+     hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1)
+     hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2)
+     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
+     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
+    enddo
+    putj = p1
+    !! <alpha|H|psi>
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1)
+          enddo
+        endif
+      end if
+!      
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:)
+      mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l)
+          mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l)
+        enddo
+      enddo
+    end if
+
+    putj = p1
+    !! <phi|H|alpha>
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hji = hji_cache(puti,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2)
+          enddo
+        endif
+      end if
+!      
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hji = hji_cache(puti,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:)
+      mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l)
+          mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+!        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+        do mm = 1, mo_num
+         hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+         hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+         hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+         hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+        enddo
+    !! <alpha|H|psi>
+        tmp_rowij = 0.d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1)
+          endif
+        end do
+
+        mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l)
+          enddo
+        enddo
+    !! <phi|H|alpha>
+        tmp_rowji = 0.d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hji = hji_cache(putj,1) - hji_cache(putj,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hji = hji_cache(putj,2) - hji_cache(putj,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+
+        mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_rowij =  0.d0
+      tmp_rowij2 = 0.d0
+      tmp_rowji =  0.d0
+      tmp_rowji2 = 0.d0
+!      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix)
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,pfix,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,pfix,mm,hfix)
+      enddo
+      putj = p2
+    !! <alpha|H|psi>
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+        end if
+      end do
+      mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l)
+        enddo
+      enddo
+      mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l)
+        enddo
+      enddo
+
+
+    !! <phi|H|alpha>
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hji = hji_cache(puti,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hji = hji_cache(puti,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+      end do
+      mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l)
+        enddo
+      enddo
+      mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache, hji_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+!        call i_h_j_complex(gen, det, N_int, hij)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij)
+          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij
+          mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji
+        enddo
+      end do
+    end do
+end
+
diff --git a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f
new file mode 100644
index 00000000..0a08c808
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f
@@ -0,0 +1,308 @@
+
+subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be correct for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi
+
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: phase
+  double precision :: hij,hji
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+!  print*, 'in get_d2_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles
+
+  ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b)
+  if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles
+  if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then ! if one alpha and one beta xhole 
+    !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator)
+    if(ma == 2) bant = 2 ! if more beta particles than alpha particles
+
+    if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+
+      !! <alpha|H|psi>
+      do i = 1, 3    ! loop over all 3 combinations of 2 particles with spin ma
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        
+     ! |G> = |psi_{gen,i}>
+     ! |G'> = a_{x1} a_{x2} |G>
+     ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'>
+     ! |alpha> = t_{x1,x2}^{puti,putj} |G>
+     ! hij = <psi_{selectors,i}|H|alpha>
+     ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}>
+        !todo: <i|H|j>  =  (<h1,h2|p1,p2> - <h1,h2|p2,p1>) * phase
+        !    <psi|H|j> +=  dconjg(c_i) * <i|H|j>
+        !      <j|H|i>  =  (<p1,p2|h1,h2> - <p2,p1|h1,h2>) * phase
+        !    <j|H|psi> +=  <j|H|i> * c_i
+!        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2, p1, h1, h2)
+
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1)
+        if (hij == 0.d0) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!        hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij
+          enddo
+        else            ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        end if
+      end do
+      !! <phi|H|alpha>
+      do i = 1, 3    ! loop over all 3 combinations of 2 particles with spin ma
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e( p2, p1, h1, h2)
+        if (hji == 0.d0) cycle
+        hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        else            ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end do
+    else ! if 2 alpha and 2 beta particles
+      h1 = h(1,1)
+      h2 = h(1,2)
+      !! <alpha|H|psi>
+      do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+    ! hij = <psi_{selectors,i}|H|alpha> 
+!          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 )
+          if (hij /= 0.d0) then
+            ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!            hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+            enddo
+          endif
+        end do
+      end do
+      !! <phi|H|alpha>
+      do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+          hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+            enddo
+          endif
+        end do
+      end do
+    end if
+
+  else ! if holes are (a,a) or (b,b)
+    if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b)
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      !! <alpha|H|psi>
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+!          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h1, h2, p2,p1 )
+          if (hij == 0.d0) cycle
+
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij
+          enddo
+        end do
+      end do
+      !! <phi|H|alpha>
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+          hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2 )
+          if (hji == 0.d0) cycle
+          hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji
+          enddo
+        end do
+      end do
+    else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1)
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      !! <alpha|H|psi>
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+!        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = mo_bi_ortho_tc_two_e(h1, h2,p1, p2 )
+        if (hij == 0.d0) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!        hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij
+          enddo
+        endif
+      end do
+      !! <phi|H|alpha>
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+        hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2)
+        if (hji == 0.d0) cycle
+        hji = hji * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        endif
+      end do
+    else ! tip == 4  (a,a,b,b)
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+      !! <alpha|H|psi>
+!        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2))
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = (mo_bi_ortho_tc_two_e(h1, h2,p1, p2) - mo_bi_ortho_tc_two_e(h1, h2, p2,p1))
+        if (hij /= 0.d0) then
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        end if
+      !! <phi|H|alpha>
+        hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e( p2,p1, h1, h2))
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end if
+    end if
+  end if
+end
diff --git a/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f b/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f
new file mode 100644
index 00000000..e69de29b
diff --git a/src/cipsi_tc_bi_ortho/pouet b/src/cipsi_tc_bi_ortho/pouet
new file mode 100644
index 00000000..a7a41454
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pouet
@@ -0,0 +1,33 @@
+
+          if(dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp).gt.1.d-10)then
+!!!          print*,'---'
+!!!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!!!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+           call debug_det(det,N_int)
+            print*,dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
+            print*,'-- Good '
+            print*,   psi_h_alpha,    alpha_h_psi  
+            print*,'-- bad '
+            print*,psi_h_alpha_tmp,alpha_h_psi_tmp  
+            print*,'-- details good'
+          double precision :: accu_1, accu_2
+          accu_1 = 0.d0
+          accu_2 = 0.d0
+          do iii = 1, N_det
+            call get_excitation_degree( psi_det(1,1,iii), det, degree, N_int)
+            call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
+            call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
+            print*,iii,degree,i_h_alpha,alpha_h_i
+            accu_1 += i_h_alpha
+            accu_2 += alpha_h_i
+            print*,accu_1,accu_2
+            
+          enddo
+!!!          if(dabs(psi_h_alpha*alpha_h_psi).gt.1.d-10)then
+!!!          print*,p1,p2
+!!!          print*,det(1,1), det(1,2)
+!!!          call debug_det(det,N_int)
+!!!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!!!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+!!!          print*, dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),& 
+!!!                   psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f
new file mode 100644
index 00000000..13b4dff4
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2.irp.f
@@ -0,0 +1,89 @@
+subroutine pt2_tc_bi_ortho
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+  to_select = max(N_states_diag, to_select)
+
+  E_denom = E_tc ! TC Energy of the current wave function 
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+
+  N_iter += 1
+
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
+
diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
new file mode 100644
index 00000000..284b2bc8
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
@@ -0,0 +1,868 @@
+BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
+ implicit none
+ BEGIN_DOC
+ ! State for stochatsic PT2
+ END_DOC
+ pt2_stoch_istate = 1
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ]
+&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ]
+  implicit none
+  logical, external :: testTeethBuilding
+  integer :: i,j
+  pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num  - n_core_orb*2
+  pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000)
+  call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max')
+
+  pt2_F(:) = max(int(sqrt(float(pt2_n_tasks_max))),1)
+  do i=1,pt2_n_0(1+pt2_N_teeth/4)
+    pt2_F(i) = pt2_n_tasks_max*pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/4), pt2_n_0(pt2_N_teeth-pt2_N_teeth/10)
+    pt2_F(i) = pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/10), N_det_generators
+    pt2_F(i) = 1
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_N_teeth ]
+&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ]
+  implicit none
+  logical, external :: testTeethBuilding
+
+  if(N_det_generators < 500) then
+    pt2_minDetInFirstTeeth = 1
+    pt2_N_teeth = 1
+  else
+    pt2_minDetInFirstTeeth = min(5, N_det_generators)
+    do pt2_N_teeth=100,2,-1
+      if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
+    end do
+  end if
+  call write_int(6,pt2_N_teeth,'Number of comb teeth')
+END_PROVIDER
+
+
+logical function testTeethBuilding(minF, N)
+  implicit none
+  integer, intent(in) :: minF, N
+  integer :: n0, i
+  double precision :: u0, Wt, r
+
+  double precision, allocatable :: tilde_w(:), tilde_cW(:)
+  integer, external :: dress_find_sample
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  rss = memory_of_double(2*N_det_generators+1)
+  call check_mem(rss,irp_here)
+
+  allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+  double precision :: norm2
+  norm2 = 0.d0
+  do i=N_det_generators,1,-1
+    tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate) * &
+                  psi_coef_sorted_tc_gen(i,pt2_stoch_istate)
+    norm2 = norm2 + tilde_w(i)
+  enddo
+
+  f = 1.d0/norm2
+  tilde_w(:) = tilde_w(:) * f
+
+  tilde_cW(0) = -1.d0
+  do i=1,N_det_generators
+    tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+  enddo
+  tilde_cW(:) = tilde_cW(:) + 1.d0
+  deallocate(tilde_w)
+
+  n0 = 0
+  testTeethBuilding = .false.
+  double precision :: f
+  integer :: minFN
+  minFN = N_det_generators - minF * N
+  f = 1.d0/dble(N)
+  do
+    u0 = tilde_cW(n0)
+    r = tilde_cW(n0 + minF)
+    Wt = (1d0 - u0) * f
+    if (dabs(Wt) <= 1.d-3) then
+      exit
+    endif
+    if(Wt >= r - u0) then
+       testTeethBuilding = .true.
+       exit
+    end if
+    n0 += 1
+    if(n0 > minFN) then
+      exit
+    end if
+  end do
+  deallocate(tilde_cW)
+
+end function
+
+
+
+subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket, zmq_socket_pull
+  integer, intent(in)            :: N_in
+!  integer, intent(inout)         :: N_in
+  double precision, intent(in)   :: relative_error, E(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+!
+  integer                        :: i, N
+
+  double precision               :: state_average_weight_save(N_states), w(N_states,4)
+  integer(ZMQ_PTR), external     :: new_zmq_to_qp_run_socket
+  type(selection_buffer)         :: b
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc psi_det_sorted_tc
+  PROVIDE psi_det_hii selection_weight pseudo_sym
+  PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+  PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+
+  if (h0_type == 'CFG') then
+    PROVIDE psi_configuration_hii det_to_configuration
+  endif
+
+  if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then
+    print*,'ZMQ_selection'
+    call ZMQ_selection(N_in, pt2_data)
+  else
+    print*,'else ZMQ_selection'
+
+    N = max(N_in,1) * N_states
+    state_average_weight_save(:) = state_average_weight(:)
+    if (int(N,8)*2_8 > huge(1)) then
+      print *,  irp_here, ': integer too large'
+      stop -1
+    endif
+    call create_selection_buffer(N, N*2, b)
+    ASSERT (associated(b%det))
+    ASSERT (associated(b%val))
+
+    do pt2_stoch_istate=1,N_states
+      state_average_weight(:) = 0.d0
+      state_average_weight(pt2_stoch_istate) = 1.d0
+      TOUCH state_average_weight pt2_stoch_istate selection_weight
+
+      PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w
+      PROVIDE pt2_u pt2_J pt2_R
+      call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+
+      integer, external              :: zmq_put_psi
+      integer, external              :: zmq_put_N_det_generators
+      integer, external              :: zmq_put_N_det_selectors
+      integer, external              :: zmq_put_dvector
+      integer, external              :: zmq_put_ivector
+      if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+        stop 'Unable to put psi on ZMQ server'
+      endif
+      if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_generators on ZMQ server'
+      endif
+      if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_selectors on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+        stop 'Unable to put energy on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+        stop 'Unable to put state_average_weight on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+        stop 'Unable to put selection_weight on ZMQ server'
+      endif
+      if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then
+        stop 'Unable to put pt2_stoch_istate on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+        stop 'Unable to put threshold_generators on ZMQ server'
+      endif
+
+
+      integer, external :: add_task_to_taskserver
+      character(300000) :: task
+
+      integer :: j,k,ipos,ifirst
+      ifirst=0
+
+      ipos=0
+      do i=1,N_det_generators
+        if (pt2_F(i) > 1) then
+          ipos += 1
+        endif
+      enddo
+      call write_int(6,sum(pt2_F),'Number of tasks')
+      call write_int(6,ipos,'Number of fragmented tasks')
+
+      ipos=1
+      do i= 1, N_det_generators
+        do j=1,pt2_F(pt2_J(i))
+          write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N_in
+          ipos += 30
+          if (ipos > 300000-30) then
+            if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+              stop 'Unable to add task to task server'
+            endif
+            ipos=1
+            if (ifirst == 0) then
+              ifirst=1
+              if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+                print *,  irp_here, ': Failed in zmq_set_running'
+              endif
+            endif
+          endif
+        end do
+      enddo
+      if (ipos > 1) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+      endif
+
+      integer, external :: zmq_set_running
+      if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+        print *,  irp_here, ': Failed in zmq_set_running'
+      endif
+
+
+      double precision :: mem_collector, mem, rss
+
+      call resident_memory(rss)
+
+      mem_collector = 8.d0 *                  & ! bytes
+            ( 1.d0*pt2_n_tasks_max            & ! task_id, index
+            + 0.635d0*N_det_generators        & ! f,d
+            + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task
+            + N_det_generators*pt2_type_size(N_states)  & ! pt2_data_I
+            + 4.d0*(pt2_N_teeth+1)            & ! S, S2, T2, T3
+            + 1.d0*(N_int*2.d0*N + N)         & ! selection buffer
+            + 1.d0*(N_int*2.d0*N + N)         & ! sort selection buffer
+            ) / 1024.d0**3
+
+      integer :: nproc_target, ii
+      nproc_target = nthreads_pt2
+      ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+      do
+        mem = mem_collector +                   & !
+              nproc_target * 8.d0 *             & ! bytes
+              ( 0.5d0*pt2_n_tasks_max           & ! task_id
+              + 64.d0*pt2_n_tasks_max           & ! task
+              + pt2_type_size(N_states)*pt2_n_tasks_max*N_states   & ! pt2, variance, overlap
+              + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+              + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                  ! prefullinteresting, fullinteresting
+              + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+              + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+              ) / 1024.d0**3
+
+        if (nproc_target == 0) then
+          call check_mem(mem,irp_here)
+          nproc_target = 1
+          exit
+        endif
+
+        if (mem+rss < qp_max_mem) then
+          exit
+        endif
+
+        nproc_target = nproc_target - 1
+
+      enddo
+      call write_int(6,nproc_target,'Number of threads for PT2')
+      call write_double(6,mem,'Memory (Gb)')
+
+      call omp_set_max_active_levels(1)
+
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', ' Samples          Energy                Variance               Norm^2          Seconds'
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+      PROVIDE global_selection_buffer
+
+      !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1)            &
+          !$OMP  PRIVATE(i)
+      i = omp_get_thread_num()
+      if (i==0) then
+
+        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N)
+        pt2_data % rpt2(pt2_stoch_istate) =  &
+          pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+        !TODO : We should use here the correct formula for the error of X/Y
+        pt2_data_err % rpt2(pt2_stoch_istate) =  &
+          pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+      else
+        call pt2_slave_inproc(i)
+      endif
+      !$OMP END PARALLEL
+      call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+      call omp_set_max_active_levels(8)
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+    do k=1,N_states
+      pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
+    enddo
+    SOFT_TOUCH pt2_overlap
+
+    enddo
+    FREE pt2_stoch_istate
+
+    ! Symmetrize overlap
+    do j=2,N_states
+     do i=1,j-1
+       pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i))
+       pt2_overlap(j,i) = pt2_overlap(i,j)
+     enddo
+    enddo
+
+    print *, 'Overlap of perturbed states:'
+    do k=1,N_states
+      print *, pt2_overlap(k,:)
+    enddo
+    print *, '-------'
+
+    if (N_in > 0) then
+      b%cur = min(N_in,b%cur)
+      if (s2_eig) then
+        call make_selection_buffer_s2(b)
+      else
+        call remove_duplicates_in_selection_buffer(b)
+      endif
+      call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+    endif
+    call delete_selection_buffer(b)
+
+    state_average_weight(:) = state_average_weight_save(:)
+    TOUCH state_average_weight
+    call update_pt2_and_variance_weights(pt2_data, N_states)
+  endif
+
+
+end subroutine
+
+
+subroutine pt2_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  PROVIDE global_selection_buffer
+  call run_pt2_slave(1,i,pt2_e0_denominator)
+end
+
+
+subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(in)   :: relative_error, E
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N_
+
+  type(pt2_type), allocatable    :: pt2_data_task(:)
+  type(pt2_type), allocatable    :: pt2_data_I(:)
+  type(pt2_type), allocatable    :: pt2_data_S(:)
+  type(pt2_type), allocatable    :: pt2_data_S2(:)
+  type(pt2_type)                 :: pt2_data_teeth
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+  integer, external :: zmq_delete_tasks_async_send
+  integer, external :: zmq_delete_tasks_async_recv
+  integer, external :: zmq_abort
+  integer, external :: pt2_find_sample_lr
+
+  PROVIDE pt2_stoch_istate
+
+  integer :: more, n, i, p, c, t, n_tasks, U
+  integer, allocatable :: task_id(:)
+  integer, allocatable :: index(:)
+
+  double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states)
+  double precision :: eqta(N_states)
+  double precision :: time, time1, time0
+
+  integer, allocatable :: f(:)
+  logical, allocatable :: d(:)
+  logical :: do_exit, stop_now, sending
+  logical, external :: qp_stop
+  type(selection_buffer) :: b2
+
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  sending =.False.
+
+  rss  = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
+  rss += memory_of_double(N_states*N_det_generators)*3.d0
+  rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0
+  rss += memory_of_double(pt2_N_teeth+1)*4.d0
+  call check_mem(rss,irp_here)
+
+  ! If an allocation is added here, the estimate of the memory should also be
+  ! updated in ZMQ_pt2
+  allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
+  allocate(d(N_det_generators+1))
+  allocate(pt2_data_task(pt2_n_tasks_max))
+  allocate(pt2_data_I(N_det_generators))
+  allocate(pt2_data_S(pt2_N_teeth+1))
+  allocate(pt2_data_S2(pt2_N_teeth+1))
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N_, N_*2, b2)
+
+
+  pt2_data % pt2(pt2_stoch_istate) = -huge(1.)
+  pt2_data_err % pt2(pt2_stoch_istate) = huge(1.)
+  pt2_data % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data_err % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data % overlap(:,pt2_stoch_istate) = 0.d0
+  pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.)
+  n = 1
+  t = 0
+  U = 0
+  do i=1,pt2_n_tasks_max
+    call pt2_alloc(pt2_data_task(i),N_states)
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_alloc(pt2_data_S(i),N_states)
+    call pt2_alloc(pt2_data_S2(i),N_states)
+  enddo
+  do i=1,N_det_generators
+    call pt2_alloc(pt2_data_I(i),N_states)
+  enddo
+  f(:) = pt2_F(:)
+  d(:) = .false.
+  n_tasks = 0
+  E0 = E
+  v0 = 0.d0
+  n0(:) = 0.d0
+  more = 1
+  call wall_time(time0)
+  time1 = time0
+
+  do_exit = .false.
+  stop_now = .false.
+  do while (n <= N_det_generators)
+    if(f(pt2_J(n)) == 0) then
+      d(pt2_J(n)) = .true.
+      do while(d(U+1))
+        U += 1
+      end do
+
+      ! Deterministic part
+      do while(t <= pt2_N_teeth)
+        if(U >= pt2_n_0(t+1)) then
+          t=t+1
+          E0 = 0.d0
+          v0 = 0.d0
+          n0(:) = 0.d0
+          do i=pt2_n_0(t),1,-1
+            E0 += pt2_data_I(i) % pt2(pt2_stoch_istate)
+            v0 += pt2_data_I(i) % variance(pt2_stoch_istate)
+            n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate)
+          end do
+        else
+          exit
+        end if
+      end do
+
+      ! Add Stochastic part
+      c = pt2_R(n)
+      if(c > 0) then
+
+        call pt2_alloc(pt2_data_teeth,N_states)
+        do p=pt2_N_teeth, 1, -1
+          v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
+          i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
+          v = pt2_W_T / pt2_w(i)
+          call pt2_add ( pt2_data_teeth,  v,  pt2_data_I(i) )
+          call pt2_add ( pt2_data_S(p),  1.d0,  pt2_data_teeth )
+          call pt2_add2( pt2_data_S2(p), 1.d0,  pt2_data_teeth )
+        enddo
+        call pt2_dealloc(pt2_data_teeth)
+
+        avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
+        avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
+        avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
+        if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
+          do_exit = .true.
+        endif
+        if (qp_stop()) then
+          stop_now = .True.
+        endif
+        pt2_data % pt2(pt2_stoch_istate) = avg
+        pt2_data % variance(pt2_stoch_istate) = avg2
+        pt2_data % overlap(:,pt2_stoch_istate) = avg3(:)
+        call wall_time(time)
+        ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
+        if(c > 2) then
+          eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % pt2(pt2_stoch_istate) = eqt
+
+          eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % variance(pt2_stoch_istate) = eqt
+
+          eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
+          pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)
+
+
+          if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
+            time1 = time
+            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, &
+              pt2_data     % pt2(pt2_stoch_istate) +E, &
+              pt2_data_err % pt2(pt2_stoch_istate), &
+              pt2_data     % variance(pt2_stoch_istate), &
+              pt2_data_err % variance(pt2_stoch_istate), &
+              pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              time-time0
+            if (stop_now .or. (                                      &
+                  (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+                  (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
+              if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                call sleep(10)
+                if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                  print *, irp_here, ': Error in sending abort signal (2)'
+                endif
+              endif
+            endif
+          endif
+        endif
+      end if
+      n += 1
+    else if(more == 0) then
+      exit
+    else
+      call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2)
+      if(n_tasks > pt2_n_tasks_max)then
+       print*,'PB !!!'
+       print*,'If you see this, send a bug report with the following content'
+       print*,irp_here
+       print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max
+       stop -1
+      endif
+      if (zmq_delete_tasks_async_send(zmq_to_qp_run_socket,task_id,n_tasks,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (send)'
+      endif
+      do i=1,n_tasks
+        if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then
+         print*,'PB !!!'
+         print*,'If you see this, send a bug report with the following content'
+         print*,irp_here
+         print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
+         stop -1
+        endif
+        call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i))
+        f(index(i)) -= 1
+      end do
+      do i=1, b2%cur
+        ! We assume the pulled buffer is sorted
+        if (b2%val(i) > b%mini) exit
+        call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      end do
+      if (zmq_delete_tasks_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (recv)'
+      endif
+    end if
+  end do
+  do i=1,N_det_generators
+    call pt2_dealloc(pt2_data_I(i))
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_dealloc(pt2_data_S(i))
+    call pt2_dealloc(pt2_data_S2(i))
+  enddo
+  do i=1,pt2_n_tasks_max
+    call pt2_dealloc(pt2_data_task(i))
+  enddo
+!print *,  'deleting b2'
+  call delete_selection_buffer(b2)
+!print *,  'sorting b'
+  call sort_selection_buffer(b)
+!print *,  'done'
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+
+end subroutine
+
+
+integer function pt2_find_sample(v, w)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, external :: pt2_find_sample_lr
+
+  pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators)
+end function
+
+
+integer function pt2_find_sample_lr(v, w, l_in, r_in)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, intent(in) :: l_in,r_in
+  integer :: i,l,r
+
+  l=l_in
+  r=r_in
+
+  do while(r-l > 1)
+    i = shiftr(r+l,1)
+    if(w(i) < v) then
+      l = i
+    else
+      r = i
+    end if
+  end do
+  i = r
+  do r=i+1,N_det_generators
+    if (w(r) /= w(i)) then
+      exit
+    endif
+  enddo
+  pt2_find_sample_lr = r-1
+end function
+
+
+BEGIN_PROVIDER [ integer, pt2_n_tasks ]
+ implicit none
+ BEGIN_DOC
+ ! Number of parallel tasks for the Monte Carlo
+ END_DOC
+ pt2_n_tasks = N_det_generators
+END_PROVIDER
+
+BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
+  implicit none
+  integer, allocatable :: seed(:)
+  integer :: m,i
+  call random_seed(size=m)
+  allocate(seed(m))
+  do i=1,m
+    seed(i) = i
+  enddo
+  call random_seed(put=seed)
+  deallocate(seed)
+
+  call RANDOM_NUMBER(pt2_u)
+ END_PROVIDER
+
+ BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
+&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
+  implicit none
+  BEGIN_DOC
+! pt2_J contains the list of generators after ordering them according to the
+! Monte Carlo sampling.
+!
+! pt2_R(i) is the number of combs drawn when determinant i is computed.
+  END_DOC
+  integer                :: N_c, N_j
+  integer                :: U, t, i
+  double precision       :: v
+  integer, external      :: pt2_find_sample_lr
+
+  logical, allocatable :: pt2_d(:)
+  integer :: m,l,r,k
+  integer :: ncache
+  integer, allocatable :: ii(:,:)
+  double precision :: dt
+
+  ncache = min(N_det_generators,10000)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+
+  allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators))
+
+  pt2_R(:) = 0
+  pt2_d(:) = .false.
+  N_c = 0
+  N_j = pt2_n_0(1)
+  do i=1,N_j
+      pt2_d(i) = .true.
+      pt2_J(i) = i
+  end do
+
+  U = 0
+  do while(N_j < pt2_n_tasks)
+
+    if (N_c+ncache > N_det_generators) then
+      ncache = N_det_generators - N_c
+    endif
+
+    !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k)
+    do k=1, ncache
+      dt = pt2_u_0
+      do t=1, pt2_N_teeth
+        v = dt + pt2_W_T *pt2_u(N_c+k)
+        dt = dt + pt2_W_T
+        ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1))
+      end do
+    enddo
+    !$OMP END PARALLEL DO
+
+    do k=1,ncache
+      !ADD_COMB
+      N_c = N_c+1
+      do t=1, pt2_N_teeth
+        i = ii(t,k)
+        if(.not. pt2_d(i)) then
+          N_j += 1
+          pt2_J(N_j) = i
+          pt2_d(i) = .true.
+        end if
+      end do
+
+      pt2_R(N_j) = N_c
+
+      !FILL_TOOTH
+      do while(U < N_det_generators)
+        U += 1
+        if(.not. pt2_d(U)) then
+          N_j += 1
+          pt2_J(N_j) = U
+          pt2_d(U) = .true.
+          exit
+        end if
+      end do
+      if (N_j >= pt2_n_tasks) exit
+    end do
+  enddo
+
+  if(N_det_generators > 1) then
+    pt2_R(N_det_generators-1) = 0
+    pt2_R(N_det_generators) = N_c
+  end if
+
+  deallocate(ii,pt2_d)
+
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_W_T ]
+&BEGIN_PROVIDER [ double precision, pt2_u_0 ]
+&BEGIN_PROVIDER [ integer,          pt2_n_0, (pt2_N_teeth+1) ]
+   implicit none
+   integer                        :: i, t
+   double precision, allocatable  :: tilde_w(:), tilde_cW(:)
+   double precision               :: r, tooth_width
+   integer, external              :: pt2_find_sample
+
+   double precision               :: rss
+   double precision, external     :: memory_of_double, memory_of_int
+   rss = memory_of_double(2*N_det_generators+1)
+   call check_mem(rss,irp_here)
+
+   if (N_det_generators == 1) then
+
+     pt2_w(1)   = 1.d0
+     pt2_cw(1)  = 1.d0
+     pt2_u_0    = 1.d0
+     pt2_W_T    = 0.d0
+     pt2_n_0(1) = 0
+     pt2_n_0(2) = 1
+
+   else
+
+     allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+     tilde_cW(0) = 0d0
+
+     do i=1,N_det_generators
+       tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+     enddo
+
+     double precision               :: norm2
+     norm2 = 0.d0
+     do i=N_det_generators,1,-1
+       norm2 += tilde_w(i)
+     enddo
+
+     tilde_w(:) = tilde_w(:) / norm2
+
+     tilde_cW(0) = -1.d0
+     do i=1,N_det_generators
+       tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+     enddo
+     tilde_cW(:) = tilde_cW(:) + 1.d0
+
+     pt2_n_0(1) = 0
+     do
+     pt2_u_0 = tilde_cW(pt2_n_0(1))
+     r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
+     pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
+     if(pt2_W_T >= r - pt2_u_0) then
+       exit
+     end if
+     pt2_n_0(1) += 1
+     if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then
+       print *, "teeth building failed"
+       stop -1
+     end if
+   end do
+
+   do t=2, pt2_N_teeth
+     r = pt2_u_0 + pt2_W_T * dble(t-1)
+     pt2_n_0(t) = pt2_find_sample(r, tilde_cW)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+   pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1))
+   do t=1, pt2_N_teeth
+     tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
+     if (tooth_width == 0.d0) then
+       tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1)))
+     endif
+     ASSERT(tooth_width > 0.d0)
+     do i=pt2_n_0(t)+1, pt2_n_0(t+1)
+       pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
+     end do
+   end do
+
+   pt2_cW(0) = 0d0
+   do i=1,N_det_generators
+     pt2_cW(i) = pt2_cW(i-1) + pt2_w(i)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+ endif
+END_PROVIDER
+
+
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/pt2_type.irp.f b/src/cipsi_tc_bi_ortho/pt2_type.irp.f
new file mode 100644
index 00000000..ee90d421
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2_type.irp.f
@@ -0,0 +1,128 @@
+subroutine pt2_alloc(pt2_data,N)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: N
+  integer :: k
+
+  allocate(pt2_data % pt2(N)           &
+          ,pt2_data % variance(N)      &
+          ,pt2_data % rpt2(N)          &
+          ,pt2_data % overlap(N,N)     &
+          )
+
+  pt2_data % pt2(:)           = 0.d0
+  pt2_data % variance(:)      = 0.d0
+  pt2_data % rpt2(:)          = 0.d0
+  pt2_data % overlap(:,:)     = 0.d0
+
+end subroutine
+
+subroutine pt2_dealloc(pt2_data)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  deallocate(pt2_data % pt2         &
+            ,pt2_data % variance    &
+            ,pt2_data % rpt2        &
+            ,pt2_data % overlap     &
+            )
+end subroutine
+
+subroutine pt2_add(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)            = p1 % pt2(:)           + p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)            = p1 % pt2(:)           + w * p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + w * p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + w * p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + w * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_add2(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2**2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)           = p1 % pt2(:)           + p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)           = p1 % pt2(:)           + w * p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + w * p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + w * p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + w * p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_serialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(in)    :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(out) :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  x(1:n)           =  pt2_data % pt2(1:n)
+  k=n
+  x(k+1:k+n)     =  pt2_data % rpt2(1:n)
+  k=k+n
+  x(k+1:k+n)     =  pt2_data % variance(1:n)
+  k=k+n
+  x(k+1:k+n2)  =  reshape(pt2_data % overlap(1:n,1:n), (/ n2 /))
+
+end
+
+subroutine pt2_deserialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  pt2_data % pt2(1:n)           =   x(1:n)
+  k=n
+  pt2_data % rpt2(1:n)          =   x(k+1:k+n)
+  k=k+n
+  pt2_data % variance(1:n)      =   x(k+1:k+n)
+  k=k+n
+  pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+
+end
diff --git a/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
new file mode 100644
index 00000000..aa6546e7
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
@@ -0,0 +1,549 @@
+ use omp_lib
+ use selection_types
+ use f77_zmq
+BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_init_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_set_lock(global_selection_buffer_lock)
+ call delete_selection_buffer(global_selection_buffer)
+ call create_selection_buffer(N_det_generators, 2*N_det_generators, &
+    global_selection_buffer)
+ call omp_unset_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+
+subroutine run_pt2_slave(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  if (N_det > 100000 ) then
+    call run_pt2_slave_large(thread,iproc,energy)
+  else
+    call run_pt2_slave_small(thread,iproc,energy)
+  endif
+end
+
+subroutine run_pt2_slave_small(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512), allocatable   :: task(:)
+  integer, allocatable           :: task_id(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type), allocatable :: pt2_data(:)
+  integer :: n_tasks, k, N
+  integer, allocatable :: i_generator(:), subset(:)
+
+  double precision, external :: memory_of_double, memory_of_int
+  integer :: bsize ! Size of selection buffers
+
+  allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  done = .False.
+  do while (.not.done)
+
+    n_tasks = max(1,n_tasks)
+    n_tasks = min(pt2_n_tasks_max,n_tasks)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(n_tasks) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    do k=1,n_tasks
+      call sscanf_ddd(task(k), subset(k), i_generator(k), N)
+    enddo
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    double precision :: time0, time1
+    call wall_time(time0)
+    do k=1,n_tasks
+      call pt2_alloc(pt2_data(k),N_states)
+      b%cur = 0
+      call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
+    enddo
+    call wall_time(time1)
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+    call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks)
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
+    b%cur=0
+
+!    ! Try to adjust n_tasks around nproc/2 seconds per job
+    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
+    n_tasks = min(n_tasks, pt2_n_tasks_max)
+!    n_tasks = 1
+  end do
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call usleep(500)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  deallocate(pt2_data)
+end subroutine
+
+
+subroutine run_pt2_slave_large(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512)                :: task
+  integer                        :: task_id(1)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type) :: pt2_data
+  integer :: n_tasks, k, N
+  integer :: i_generator, subset
+
+  integer :: bsize ! Size of selection buffers
+  logical :: sending
+  double precision :: time_shift
+
+  PROVIDE global_selection_buffer global_selection_buffer_lock
+
+  call random_number(time_shift)
+  time_shift = time_shift*15.d0
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  sending = .False.
+  done = .False.
+  double precision :: time0, time1
+  call wall_time(time0)
+  time0 = time0+time_shift
+  do while (.not.done)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(1) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    call sscanf_ddd(task, subset, i_generator, N)
+    if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then
+      print *,  irp_here
+      stop 'bug in selection'
+    endif
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    call pt2_alloc(pt2_data,N_states)
+    b%cur = 0
+    call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+
+    call wall_time(time1)
+!    if (time1-time0 > 15.d0) then
+      call omp_set_lock(global_selection_buffer_lock)
+      global_selection_buffer%mini = b%mini
+      call merge_selection_buffers(b,global_selection_buffer)
+      b%cur=0
+      call omp_unset_lock(global_selection_buffer_lock)
+      call wall_time(time0)
+!    endif
+
+    call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+    if ( iproc == 1 .or. i_generator < 100 .or. done) then
+      call omp_set_lock(global_selection_buffer_lock)
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
+      global_selection_buffer%cur = 0
+      call omp_unset_lock(global_selection_buffer_lock)
+    else
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending)
+    endif
+
+    call pt2_dealloc(pt2_data)
+  end do
+  call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call sleep(1)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  FREE global_selection_buffer
+end subroutine
+
+
+subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+
+  logical :: sending
+  sending = .False.
+  call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+  call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending)
+end subroutine
+
+
+subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+  logical, intent(inout) :: sending
+  integer :: rc, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  if (sending) then
+    print *,  irp_here, ': sending is true'
+    stop -1
+  endif
+  sending = .True.
+
+  rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 1
+    return
+  else if(rc /= 4) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 2
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  do i=1,n_tasks
+    call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  deallocate(pt2_serialized)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 6
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  if (b%cur == 0) then
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, 0)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+  else
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%val, 8_8*int(b%cur,8), ZMQ_SNDMORE)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 8
+      return
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 9
+      return
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+  endif
+
+end subroutine
+
+subroutine push_pt2_results_async_recv(zmq_socket_push,mini,sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)    :: zmq_socket_push
+  double precision, intent(out) :: mini
+  logical, intent(inout) :: sending
+  integer :: rc
+
+  if (.not.sending) return
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 10
+    return
+  else if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+  rc = f77_zmq_recv( zmq_socket_push, mini, 8, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 11
+    return
+  else if (rc /= 8) then
+    print *,  irp_here//': error in receiving mini'
+    stop 12
+  endif
+IRP_ENDIF
+  sending = .False.
+end subroutine
+
+
+
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b)
+ use selection_types
+ use f77_zmq
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout)  :: pt2_data(*)
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(out) :: index(*)
+  integer, intent(out) :: n_tasks, task_id(*)
+  integer :: rc, rn, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  do i=1,n_tasks
+    call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+  deallocate(pt2_serialized)
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, b%cur, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  if (b%cur > 0) then
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%val, 8_8*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, ZMQ_SNDMORE)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+  rc = f77_zmq_send( zmq_socket_pull, b%mini, 8, 0)
+IRP_ENDIF
+
+end subroutine
+
diff --git a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
new file mode 100644
index 00000000..d351cc79
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
@@ -0,0 +1,255 @@
+subroutine run_selection_slave(thread, iproc, energy)
+
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  double precision, intent(in) :: energy(N_states)
+  integer,          intent(in) :: thread, iproc
+
+  integer                      :: rc, i
+  integer                      :: worker_id, task_id(1), ctask, ltask
+  character*(512)              :: task
+  integer(ZMQ_PTR)             :: zmq_to_qp_run_socket
+  integer(ZMQ_PTR)             :: zmq_socket_push
+  integer(ZMQ_PTR), external   :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR), external   :: new_zmq_push_socket
+  type(selection_buffer)       :: buf, buf2
+  type(pt2_type)               :: pt2_data
+  logical                      :: done, buffer_ready
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym
+  PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc weight_selection
+
+  call pt2_alloc(pt2_data,N_states)
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  buf%N = 0
+  buffer_ready = .False.
+  ctask = 1
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then
+      exit
+    endif
+    done = task_id(ctask) == 0
+    if (done) then
+      ctask = ctask - 1
+    else
+      integer :: i_generator, N, subset, bsize
+      call sscanf_ddd(task, subset, i_generator, N)
+      if(buf%N == 0) then
+        ! Only first time
+        call create_selection_buffer(N, N*2, buf)
+        buffer_ready = .True.
+      else
+        if (N /= buf%N) then
+          print *, 'N=', N
+          print *, 'buf%N=', buf%N
+          print *, 'bug in ', irp_here
+          stop '-1'
+        end if
+      end if
+      call select_connected(i_generator, energy, pt2_data, buf,subset, pt2_F(i_generator))
+    endif
+
+    integer, external :: task_done_to_taskserver
+
+    if(done .or. ctask == size(task_id)) then
+      do i=1, ctask
+         if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+           call usleep(100)
+          if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+            ctask = 0
+            done = .true.
+            exit
+          endif
+         endif
+      end do
+      if(ctask > 0) then
+        call sort_selection_buffer(buf)
+!        call merge_selection_buffers(buf,buf2)
+        call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+        call pt2_dealloc(pt2_data)
+        call pt2_alloc(pt2_data,N_states)
+!        buf%mini = buf2%mini
+        buf%cur = 0
+      end if
+      ctask = 0
+    end if
+
+    if(done) exit
+    ctask = ctask + 1
+  end do
+
+  if(ctask > 0) then
+    call sort_selection_buffer(buf)
+!    call merge_selection_buffers(buf,buf2)
+    call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+!    buf%mini = buf2%mini
+    buf%cur = 0
+  end if
+  ctask = 0
+  call pt2_dealloc(pt2_data)
+
+  integer, external :: disconnect_from_taskserver
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
+    continue
+  endif
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  if (buffer_ready) then
+    call delete_selection_buffer(buf)
+!    call delete_selection_buffer(buf2)
+  endif
+end subroutine
+
+
+subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in) :: ntasks, task_id(*)
+  integer :: rc
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  call pt2_serialize(pt2_data,N_states,pt2_serialized)
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+  deallocate(pt2_serialized)
+
+  if (b%cur > 0) then
+
+      rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)
+      if(rc /= 8*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)
+      if(rc /= bit_kind*N_int*2*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)'
+      endif
+
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)'
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout) :: pt2_data
+  double precision, intent(out) :: val(*)
+  integer(bit_kind), intent(out) :: det(N_int, 2, *)
+  integer, intent(out) :: N, ntasks, task_id(*)
+  integer :: rc, rn, i
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, N, 4, 0)'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0)
+  if (rc == -1) then
+    ntasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  call pt2_deserialize(pt2_data,N_states,pt2_serialized)
+  deallocate(pt2_serialized)
+
+  if (N>0) then
+      rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)
+      if(rc /= 8*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)
+      if(rc /= bit_kind*N_int*2*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)'
+      endif
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+IRP_ENDIF
+end subroutine
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
new file mode 100644
index 00000000..4c271a4b
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -0,0 +1,1091 @@
+use bitmasks
+
+subroutine get_mask_phase(det1, pm, Nint)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2)
+  integer(bit_kind), intent(out) :: pm(Nint,2)
+  integer(bit_kind) :: tmp1, tmp2
+  integer :: i
+  tmp1 = 0_8
+  tmp2 = 0_8
+  select case (Nint)
+
+BEGIN_TEMPLATE
+    case ($Nint)
+      do i=1,$Nint
+        pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1))
+        pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32))
+        pm(i,1) = ieor(pm(i,1), tmp1)
+        pm(i,2) = ieor(pm(i,2), tmp2)
+        if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1)
+        if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2)
+      end do
+SUBST [ Nint ]
+1;;
+2;;
+3;;
+4;;
+END_TEMPLATE
+    case default
+      do i=1,Nint
+        pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1))
+        pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32))
+        pm(i,1) = ieor(pm(i,1), tmp1)
+        pm(i,2) = ieor(pm(i,2), tmp2)
+        if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1)
+        if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2)
+      end do
+  end select
+
+end subroutine
+
+
+subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
+  use bitmasks
+  use selection_types
+  implicit none
+  integer, intent(in)    :: i_generator, subset, csubset
+  type(selection_buffer), intent(inout) :: b
+  type(pt2_type), intent(inout) :: pt2_data
+  integer :: k,l
+  double precision, intent(in)   :: E0(N_states)
+
+  integer(bit_kind)              :: hole_mask(N_int,2), particle_mask(N_int,2)
+
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  allocate(fock_diag_tmp(2,mo_num+1))
+
+  call build_fock_tmp_tc(fock_diag_tmp, psi_det_generators(1,1,i_generator), N_int)
+
+  do k=1,N_int
+      hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole), psi_det_generators(k,1,i_generator))
+      hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole), psi_det_generators(k,2,i_generator))
+      particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) )
+      particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) )
+  enddo
+  call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset)
+  deallocate(fock_diag_tmp)
+end subroutine select_connected
+
+
+
+double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
+  use bitmasks
+  implicit none
+
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: phasemask(Nint,2)
+  integer, intent(in) :: s1, s2, h1, h2, p1, p2
+  logical :: change
+  integer :: np
+  double precision, save :: res(0:1) = (/1d0, -1d0/)
+
+  integer :: h1_int, h2_int
+  integer :: p1_int, p2_int
+  integer :: h1_bit, h2_bit
+  integer :: p1_bit, p2_bit
+  h1_int = shiftr(h1-1,bit_kind_shift)+1
+  h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1
+
+  h2_int = shiftr(h2-1,bit_kind_shift)+1
+  h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1
+
+  p1_int = shiftr(p1-1,bit_kind_shift)+1
+  p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1
+
+  p2_int = shiftr(p2-1,bit_kind_shift)+1
+  p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1
+
+
+  ! Put the phasemask bits at position 0, and add them all
+  h1_bit = int(shiftr(phasemask(h1_int,s1),h1_bit))
+  p1_bit = int(shiftr(phasemask(p1_int,s1),p1_bit))
+  h2_bit = int(shiftr(phasemask(h2_int,s2),h2_bit))
+  p2_bit = int(shiftr(phasemask(p2_int,s2),p2_bit))
+
+  np = h1_bit + p1_bit + h2_bit + p2_bit
+
+  if(p1 < h1) np = np + 1
+  if(p2 < h2) np = np + 1
+
+  if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1
+  get_phase_bi = res(iand(np,1))
+end
+
+
+subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock_diag_tmp, E0, pt2_data, buf, subset, csubset)
+  use bitmasks
+  use selection_types
+  implicit none
+  BEGIN_DOC
+  !  WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted_tc
+  END_DOC
+
+  integer,           intent(in)         :: i_generator, subset, csubset
+  integer(bit_kind), intent(in)         :: hole_mask(N_int,2), particle_mask(N_int,2)
+  double precision,  intent(in)         :: fock_diag_tmp(mo_num)
+  double precision,  intent(in)         :: E0(N_states)
+  type(pt2_type),         intent(inout) :: pt2_data
+  type(selection_buffer), intent(inout) :: buf
+
+  double precision, parameter           :: norm_thr = 1.d-16
+
+  integer                               :: h1, h2, s1, s2, s3, i1, i2, ib, sp, k, i, j, nt, ii, sze
+  integer                               :: maskInd
+  integer                               :: N_holes(2), N_particles(2)
+  integer                               :: hole_list(N_int*bit_kind_size,2)
+  integer                               :: particle_list(N_int*bit_kind_size,2)
+  integer                               :: l_a, nmax, idx
+  integer                               :: nb_count, maskInd_save
+  integer(bit_kind)                     :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2)
+  integer(bit_kind)                     :: mobMask(N_int, 2), negMask(N_int, 2)
+  logical                               :: fullMatch, ok
+  logical                               :: monoAdo, monoBdo
+  logical                               :: monoBdo_save
+  logical                               :: found
+
+  integer, allocatable                  :: preinteresting(:), prefullinteresting(:)
+  integer, allocatable                  :: interesting(:), fullinteresting(:)
+  integer, allocatable                  :: tmp_array(:)
+  integer, allocatable                  :: indices(:), exc_degree(:), iorder(:)
+  integer(bit_kind), allocatable        :: minilist(:, :, :), fullminilist(:, :, :)
+  logical, allocatable                  :: banned(:,:,:), bannedOrb(:,:)
+  double precision, allocatable         :: coef_fullminilist_rev(:,:)
+  double precision, allocatable         :: mat(:,:,:), mat_l(:,:,:), mat_r(:,:,:)
+
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc
+
+  PROVIDE banned_excitation
+
+  monoAdo = .true.
+  monoBdo = .true.
+
+
+  do k=1,N_int
+    hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
+    hole    (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2))
+    particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1))
+    particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2))
+  enddo
+
+  call bitstring_to_list_ab(hole    , hole_list    , N_holes    , N_int)
+  call bitstring_to_list_ab(particle, particle_list, N_particles, N_int)
+
+  allocate( indices(N_det), exc_degree( max(N_det_alpha_unique, N_det_beta_unique) ) )
+
+  ! Pre-compute excitation degrees wrt alpha determinants
+  k=1
+  do i=1,N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,i),       &
+        psi_det_generators(1,1,i_generator), exc_degree(i), N_int)
+  enddo
+
+  ! Iterate on 0SD beta, and find alphas 0SDTQ such that exc_degree <= 4
+  do j=1,N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,j),        &
+        psi_det_generators(1,2,i_generator), nt, N_int)
+    if (nt > 2) cycle
+    do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1
+      i = psi_bilinear_matrix_rows(l_a)
+      if(nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_tc_order(psi_bilinear_matrix_order(l_a))
+!       if (psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then
+          indices(k) = idx
+          k = k + 1
+!       endif
+      endif
+    enddo
+  enddo
+
+  ! Pre-compute excitation degrees wrt beta determinants
+  do i=1,N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,i), psi_det_generators(1,2,i_generator), exc_degree(i), N_int)
+  enddo
+
+  ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4
+  ! Remove also contributions < 1.d-20)
+  do j=1,N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,j), psi_det_generators(1,1,i_generator), nt, N_int)
+    if (nt > 1) cycle
+    do l_a = psi_bilinear_matrix_transp_rows_loc(j), psi_bilinear_matrix_transp_rows_loc(j+1)-1
+      i = psi_bilinear_matrix_transp_columns(l_a)
+      if(exc_degree(i) < 3) cycle
+      if(nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_tc_order(                                  &
+            psi_bilinear_matrix_order(                               &
+            psi_bilinear_matrix_transp_order(l_a)))
+!       if(psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then
+          indices(k) = idx
+          k = k + 1
+!       endif
+      endif
+    enddo
+  enddo
+
+  deallocate(exc_degree)
+  nmax = k - 1
+
+  call isort_noidx(indices,nmax)
+
+  ! Start with 32 elements. Size will double along with the filtering.
+  allocate(preinteresting(0:32), prefullinteresting(0:32), interesting(0:32), fullinteresting(0:32))
+  preinteresting(:) = 0
+  prefullinteresting(:) = 0
+
+  do i = 1, N_int
+    negMask(i,1) = not(psi_det_generators(i,1,i_generator))
+    negMask(i,2) = not(psi_det_generators(i,2,i_generator))
+  enddo
+
+  do k = 1, nmax
+
+    i = indices(k)
+    mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+    do j = 2, N_int
+      mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    enddo
+
+    if(nt <= 4) then
+      if(i <= N_det_selectors) then
+        sze = preinteresting(0)
+        if(sze+1 == size(preinteresting)) then
+          allocate(tmp_array(0:sze))
+          tmp_array(0:sze) = preinteresting(0:sze)
+          deallocate(preinteresting)
+          allocate(preinteresting(0:2*sze))
+          preinteresting(0:sze) = tmp_array(0:sze)
+          deallocate(tmp_array)
+        endif
+        preinteresting(0) = sze+1
+        preinteresting(sze+1) = i
+      elseif(nt <= 2) then
+        sze = prefullinteresting(0)
+        if(sze+1 == size(prefullinteresting)) then
+          allocate (tmp_array(0:sze))
+          tmp_array(0:sze) = prefullinteresting(0:sze)
+          deallocate(prefullinteresting)
+          allocate(prefullinteresting(0:2*sze))
+          prefullinteresting(0:sze) = tmp_array(0:sze)
+          deallocate(tmp_array)
+        endif
+        prefullinteresting(0) = sze+1
+        prefullinteresting(sze+1) = i
+      endif
+    endif
+
+  enddo
+  deallocate(indices)
+
+  allocate( banned(mo_num, mo_num,2), bannedOrb(mo_num, 2) )
+  allocate( mat(N_states, mo_num, mo_num) )
+  allocate( mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num) )
+  maskInd = -1
+
+
+
+
+  do s1 = 1, 2
+    do i1 = N_holes(s1), 1, -1   ! Generate low excitations first
+
+      found = .False.
+      monoBdo_save = monoBdo
+      maskInd_save = maskInd
+      do s2 = s1, 2
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        do i2 = N_holes(s2), ib, -1
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then
+            found = .True.
+          end if
+        enddo
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+
+      if (.not.found) cycle
+      monoBdo = monoBdo_save
+      maskInd = maskInd_save
+
+      h1 = hole_list(i1,s1)
+      call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, pmask, ok, N_int)
+
+      negMask = not(pmask)
+
+      interesting(0) = 0
+      fullinteresting(0) = 0
+
+      do ii = 1, preinteresting(0)
+        i = preinteresting(ii)
+        select case(N_int)
+          case(1)
+            mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+            mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+          case(2)
+            mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted_tc(1:2,1,i))
+            mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted_tc(1:2,2,i))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) +     &
+                popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
+          case(3)
+            mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted_tc(1:3,1,i))
+            mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted_tc(1:3,2,i))
+            nt = 0
+            do j = 3, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+          case(4)
+            mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted_tc(1:4,1,i))
+            mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted_tc(1:4,2,i))
+            nt = 0
+            do j = 4, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+          case default
+            mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted_tc(1:N_int,1,i))
+            mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted_tc(1:N_int,2,i))
+            nt = 0
+            do j = N_int, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+        end select
+
+        if(nt <= 4) then
+          sze = interesting(0)
+          if(sze+1 == size(interesting)) then
+            allocate (tmp_array(0:sze))
+            tmp_array(0:sze) = interesting(0:sze)
+            deallocate(interesting)
+            allocate(interesting(0:2*sze))
+            interesting(0:sze) = tmp_array(0:sze)
+            deallocate(tmp_array)
+          endif
+          interesting(0) = sze+1
+          interesting(sze+1) = i
+          if(nt <= 2) then
+            sze = fullinteresting(0)
+            if(sze+1 == size(fullinteresting)) then
+              allocate (tmp_array(0:sze))
+              tmp_array(0:sze) = fullinteresting(0:sze)
+              deallocate(fullinteresting)
+              allocate(fullinteresting(0:2*sze))
+              fullinteresting(0:sze) = tmp_array(0:sze)
+              deallocate(tmp_array)
+            endif
+            fullinteresting(0) = sze+1
+            fullinteresting(sze+1) = i
+          endif
+        endif
+
+      enddo
+
+      do ii = 1, prefullinteresting(0)
+        i = prefullinteresting(ii)
+        nt = 0
+        mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+        mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+        nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+        if (nt > 2) cycle
+        do j=N_int,2,-1
+          mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i))
+          mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i))
+          nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+          if (nt > 2) exit
+        end do
+
+        if(nt <= 2) then
+          sze = fullinteresting(0)
+          if (sze+1 == size(fullinteresting)) then
+            allocate (tmp_array(0:sze))
+            tmp_array(0:sze) = fullinteresting(0:sze)
+            deallocate(fullinteresting)
+            allocate(fullinteresting(0:2*sze))
+            fullinteresting(0:sze) = tmp_array(0:sze)
+            deallocate(tmp_array)
+          endif
+          fullinteresting(0) = sze+1
+          fullinteresting(sze+1) = i
+        endif
+      enddo
+      allocate( fullminilist (N_int, 2, fullinteresting(0)), &
+                    minilist (N_int, 2,     interesting(0))  )
+
+      do i = 1, fullinteresting(0)
+        do k = 1, N_int
+          fullminilist(k,1,i) = psi_selectors(k,1,fullinteresting(i))
+          fullminilist(k,2,i) = psi_selectors(k,2,fullinteresting(i))
+        enddo
+      enddo
+
+      do i = 1, interesting(0)
+        do k = 1, N_int
+          minilist(k,1,i) = psi_selectors(k,1,interesting(i))
+          minilist(k,2,i) = psi_selectors(k,2,interesting(i))
+        enddo
+      enddo
+
+      do s2 = s1, 2
+        sp = s1
+
+        if(s1 /= s2) sp = 3
+
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        monoAdo = .true.
+        do i2 = N_holes(s2), ib, -1   ! Generate low excitations first
+
+          h2 = hole_list(i2,s2)
+          call apply_hole(pmask, s2,h2, mask, ok, N_int)
+          banned(:,:,1) = banned_excitation(:,:)
+          banned(:,:,2) = banned_excitation(:,:)
+          do j = 1, mo_num
+            bannedOrb(j, 1) = .true.
+            bannedOrb(j, 2) = .true.
+          enddo
+          do s3 = 1, 2
+            do i = 1, N_particles(s3)
+              bannedOrb(particle_list(i,s3), s3) = .false.
+            enddo
+          enddo
+          if(s1 /= s2) then
+            if(monoBdo) then
+              bannedOrb(h1,s1) = .false.
+            endif
+            if(monoAdo) then
+              bannedOrb(h2,s2) = .false.
+              monoAdo = .false.
+            endif
+          endif
+
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then
+
+            call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
+            if(fullMatch) cycle
+
+            call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, mat_l, mat_r)
+
+            call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_l, mat_r)
+          endif
+
+        enddo
+
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+
+      deallocate(fullminilist, minilist)
+
+    enddo
+  enddo
+
+  deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
+  deallocate(banned, bannedOrb,mat)
+  deallocate(mat_l, mat_r)
+
+end subroutine select_singles_and_doubles
+
+! ---
+
+subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
+
+  use bitmasks
+  implicit none
+
+  BEGIN_DOC
+  ! Identify the determinants in det which are in the internal space. These are
+  ! the determinants that can be produced by creating two particles on the mask.
+  END_DOC
+
+  integer, intent(in) :: i_gen, N
+  integer, intent(in) :: interesting(0:N)
+  integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N)
+  logical, intent(inout) :: banned(mo_num, mo_num)
+  logical, intent(out) :: fullMatch
+
+  integer :: i, j, na, nb, list(3)
+  integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2)
+
+  fullMatch = .false.
+
+  do i=1,N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  genl : do i=1, N
+    ! If det(i) can't be generated by the mask, cycle
+    do j=1, N_int
+      if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl
+      if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl
+    end do
+
+    ! If det(i) < det(i_gen), it hs already been considered
+    if(interesting(i) < i_gen) then
+      fullMatch = .true.
+      return
+    end if
+
+    ! Identify the particles
+    do j=1, N_int
+      myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1))
+      myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2))
+    end do
+
+    call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int)
+    call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int)
+    banned(list(1), list(2)) = .true.
+  end do genl
+
+end subroutine spot_isinwf
+
+! ---
+
+subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, mat_l, mat_r)
+
+  BEGIN_DOC
+  ! Computes the contributions A(r,s) by
+  ! comparing the external determinant to all the internal determinants det(i).
+  ! an applying two particles (r,s) to the mask.
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer, intent(in)             :: sp, i_gen, N_sel
+  integer, intent(in)             :: interesting(0:N_sel)
+  integer(bit_kind),intent(in)    :: mask(N_int, 2), det(N_int, 2, N_sel)
+  logical, intent(inout)          :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num)
+
+  integer                         :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt
+  integer(bit_kind)               :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2)
+  integer(bit_kind)               :: phasemask(N_int,2)
+
+
+  PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc
+
+
+  mat   = 0d0
+  mat_l = 0d0
+  mat_r = 0d0
+
+  do i = 1, N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+!  print*,'in selection '
+  do i = 1, N_sel
+!    call debug_det(det(1,1,i),N_int)
+!    print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i))
+    if(interesting(i) < 0) then
+      stop 'prefetch interesting(i) and det(i)'
+    endif
+
+    mobMask(1,1) = iand(negMask(1,1), det(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), det(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+
+    if(nt > 4) cycle
+
+    do j = 2, N_int
+      mobMask(j,1) = iand(negMask(j,1), det(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), det(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    enddo
+
+    if(nt > 4) cycle
+
+    if (interesting(i) == i_gen) then
+      if(sp == 3) then
+        do k = 1, mo_num
+          do j = 1, mo_num
+            banned(j,k,2) = banned(k,j,1)
+          enddo
+        enddo
+      else
+        do k = 1, mo_num
+          do l = k+1, mo_num
+            banned(l,k,1) = banned(k,l,1)
+          enddo
+        enddo
+      endif
+    endif
+
+    if (interesting(i) >= i_gen) then
+
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+
+        perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
+        perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
+        do j=2,N_int
+          perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
+          perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
+        end do
+
+        call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
+        call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
+
+        call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int)
+        if(nt == 4) then
+          call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+        elseif(nt == 3) then
+          call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+        else
+          call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+        endif
+    elseif(nt == 4) then
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d2(banned, p, sp)
+    elseif(nt == 3) then
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d1(bannedOrb, p)
+    endif
+  enddo
+
+end subroutine splash_pq
+
+! ---
+
+subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_l, mat_r)
+
+  use bitmasks
+  use selection_types
+  implicit none
+
+  integer,          intent(in)          :: i_generator, sp, h1, h2
+  double precision, intent(in)          :: mat(N_states, mo_num, mo_num)
+  double precision, intent(in)          :: mat_l(N_states, mo_num, mo_num), mat_r(N_states, mo_num, mo_num)
+  logical,          intent(in)          :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
+  double precision, intent(in)          :: fock_diag_tmp(mo_num)
+  double precision, intent(in)          :: E0(N_states)
+  type(pt2_type),         intent(inout) :: pt2_data
+  type(selection_buffer), intent(inout) :: buf
+
+  integer                               :: iii, s, degree
+  integer                               :: s1, s2, p1, p2, ib, j, istate, jstate
+  integer                               :: info, k , iwork(N_states+1)
+  integer(bit_kind)                     :: occ(N_int,2), n
+  integer(bit_kind)                     :: mask(N_int, 2), det(N_int, 2)
+  logical                               :: do_cycle, ok, do_diag
+  double precision                      :: delta_E, val, Hii, w, tmp, alpha_h_psi
+  double precision                      :: E_shift
+  double precision                      :: i_h_alpha, alpha_h_i, psi_h_alpha
+  double precision                      :: e_pert(N_states), coef(N_states)
+  double precision                      :: s_weight(N_states,N_states)
+  double precision                      :: eigvalues(N_states+1)
+  double precision                      :: work(1+6*(N_states+1)+2*(N_states+1)**2)
+
+  integer,          external            :: number_of_holes, number_of_particles
+  logical,          external            :: is_a_two_holes_two_particles
+  logical,          external            :: is_a_1h1p
+  double precision, external            :: diag_H_mat_elem_fock
+
+
+  PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs
+
+  do jstate = 1, N_states
+    do istate = 1, N_states
+      s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate))
+    enddo
+  enddo
+
+  if(sp == 3) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = sp
+    s2 = sp
+  end if
+  call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int)
+  E_shift = 0.d0
+
+  if (h0_type == 'CFG') then
+    j = det_to_configuration(i_generator)
+    E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j)
+  endif
+
+  do p1 = 1, mo_num
+
+    if(bannedOrb(p1, s1)) cycle
+    ib = 1
+    if(sp /= 3) ib = p1+1
+
+    do p2 = ib, mo_num
+
+      if(bannedOrb(p2, s2)) cycle
+      if(banned(p1,p2)) cycle
+
+      ! TODO ??
+      !if(pseudo_sym)then
+      !  if(dabs(mat(1, p1, p2)).lt.thresh_sym)then
+      !    w = 0.d0
+      !  endif
+      !endif
+
+      ! MANU: ERREUR dans les calculs puisque < I | H | J > = 0
+      ! n'implique pas < I | H_TC | J > = 0 ??
+      !val = maxval(abs(mat(1:N_states, p1, p2)))
+      !if( val == 0d0) cycle
+
+      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+
+      if(do_only_cas) then
+        if( number_of_particles(det) > 0 ) cycle
+        if( number_of_holes(det) > 0 ) cycle
+      endif
+
+      if(do_ddci) then
+        if(is_a_two_holes_two_particles(det)) cycle
+      endif
+
+      if(do_only_1h1p) then
+        if(.not.is_a_1h1p(det)) cycle
+      endif
+
+      if(seniority_max >= 0) then
+        s = 0
+        do k = 1, N_int
+          s = s + popcnt(ieor(det(k,1),det(k,2)))
+        enddo
+        if (s > seniority_max) cycle
+      endif
+
+      if(excitation_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree(HF_bitmask, det(1,1), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif(excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+      if(excitation_alpha_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask, det(1,1), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif (excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree_spin(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_alpha_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+      if(excitation_beta_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask, det(1,2), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif(excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,2,k), det(1,2), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_beta_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+
+      w = 0.d0
+
+      e_pert = 0.d0
+      coef = 0.d0
+      do_diag = .False.
+
+      ! psi_det_generators  --> |i> of psi_0
+      ! psi_coef_generators --> c_i of psi_0
+      !
+      ! <alpha|H|psi_0> = \sum_i c_i <alpha|H|i>
+
+        ! -------------------------------------------
+        ! Non hermitian
+        ! c_alpha = <alpha|H(j)|psi_0>/delta_E(alpha)
+        ! e_alpha = c_alpha * <psi_0|H(j)|alpha>
+        ! <alpha|H|psi_0> and <psi_0|H|alpha>
+        ! <det|H(j)|psi_0> and transpose
+        ! -------------------------------------------
+
+!        call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii)
+        double precision               :: hmono, htwoe, hthree
+        call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii)
+        do istate = 1,N_states
+          delta_E = E0(istate) - Hii + E_shift
+          double precision               :: alpha_h_psi_tmp, psi_h_alpha_tmp, error
+          if(debug_tc_pt2 == 1)then !! Using the old version
+            psi_h_alpha = 0.d0
+            alpha_h_psi = 0.d0
+            do iii = 1, N_det_selectors
+              call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
+              call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
+              call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int)
+              if(degree == 0)then
+               print*,'problem !!!'
+               print*,'a determinant is already in the wave function !!'
+               print*,'it corresponds to the selector number ',iii
+               call debug_det(det,N_int)
+               stop
+              endif
+!              call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
+!              call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
+              psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function
+              alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function
+            enddo
+          else if(debug_tc_pt2 == 2)then !! debugging the new version
+            psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version
+            alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version
+            psi_h_alpha = 0.d0
+            alpha_h_psi = 0.d0
+            do iii = 1, N_det_selectors ! old version
+              call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
+              call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
+              psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function
+              alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function
+            enddo
+            if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then
+              error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi)
+              if(error.gt.1.d-2)then
+                print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E
+              endif
+            endif
+          else
+           psi_h_alpha = mat_l(istate, p1, p2)
+           alpha_h_psi = mat_r(istate, p1, p2)
+          endif
+          val = 4.d0 * psi_h_alpha * alpha_h_psi 
+          tmp = dsqrt(delta_E * delta_E + val)
+!          if (delta_E < 0.d0) then
+!              tmp = -tmp
+!          endif
+          e_pert(istate) = 0.25 * val / delta_E
+!          e_pert(istate) = 0.5d0 * (tmp - delta_E)
+          if(dsqrt(dabs(tmp)).gt.1.d-4.and.dabs(alpha_h_psi).gt.1.d-4)then
+           coef(istate)   = e_pert(istate) / psi_h_alpha 
+          else
+           coef(istate)   = alpha_h_psi / delta_E 
+          endif
+
+          if(selection_tc == 1)then
+           if(e_pert(istate).lt.0.d0)then 
+            e_pert(istate)=0.d0
+           else 
+            e_pert(istate)=-e_pert(istate)
+           endif
+          else if(selection_tc == -1)then
+           if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0
+          endif
+          
+!         if(selection_tc     ==  1 )then
+!          if(e_pert(istate).lt.0.d0)then
+!           e_pert(istate) = 0.d0
+!          endif
+!         else if(selection_tc == -1)then
+!          if(e_pert(istate).gt.0.d0)then
+!           e_pert(istate) = 0.d0
+!          endif
+!         endif
+        enddo
+
+
+
+      do istate = 1, N_states
+
+        alpha_h_psi = mat_r(istate, p1, p2)
+        psi_h_alpha = mat_l(istate, p1, p2)
+
+        pt2_data % overlap(:,istate) = pt2_data % overlap(:,istate) + coef(:) * coef(istate)
+        pt2_data % variance(istate)  = pt2_data % variance(istate) + dabs(e_pert(istate))
+        pt2_data % pt2(istate)       = pt2_data % pt2(istate)      + e_pert(istate)
+
+        select case (weight_selection)
+          case(5)
+            ! Variance selection
+            if (h0_type == 'CFG') then
+              w = min(w, - psi_h_alpha * alpha_h_psi * s_weight(istate,istate)) &
+                / c0_weight(istate)
+            else
+              w = min(w, - psi_h_alpha * alpha_h_psi * s_weight(istate,istate))
+            endif
+          case(6)
+            if (h0_type == 'CFG') then
+              w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) &
+                / c0_weight(istate)
+            else
+              w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate))
+            endif
+          case default
+            ! Energy selection
+            if (h0_type == 'CFG') then
+              !w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate)
+              w = min(w, -dabs(e_pert(istate)) * s_weight(istate,istate)) / c0_weight(istate)
+            else
+              !w = min(w, e_pert(istate) * s_weight(istate,istate))
+              w = min(w, -dabs( e_pert(istate) ) * s_weight(istate,istate))
+            endif
+        endselect
+      enddo
+
+      if(h0_type == 'CFG') then
+        do k = 1, N_int
+          occ(k,1) = ieor(det(k,1), det(k,2))
+          occ(k,2) = iand(det(k,1), det(k,2))
+        enddo
+        call configuration_to_dets_size(occ, n, elec_alpha_num, N_int)
+        n = max(n,1)
+        w *= dsqrt(dble(n))
+      endif
+
+      if(w <= buf%mini) then
+        call add_to_selection_buffer(buf, det, w)
+      endif
+
+    enddo ! end do p2
+  enddo   ! end do p1
+
+end subroutine fill_buffer_double
+
+! ---
+
+
+subroutine past_d1(bannedOrb, p)
+
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: bannedOrb(mo_num, 2)
+  integer, intent(in) :: p(0:4, 2)
+  integer :: i,s
+
+  do s = 1, 2
+    do i = 1, p(0, s)
+      bannedOrb(p(i, s), s) = .true.
+    end do
+  end do
+
+end subroutine past_d1
+
+! ---
+
+subroutine past_d2(banned, p, sp)
+
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: banned(mo_num, mo_num)
+  integer, intent(in) :: p(0:4, 2), sp
+  integer :: i,j
+
+  if(sp == 3) then
+    do j=1,p(0,2)
+      do i=1,p(0,1)
+        banned(p(i,1), p(j,2)) = .true.
+      end do
+    end do
+  else
+    do i=1,p(0, sp)
+      do j=1,i-1
+        banned(p(j,sp), p(i,sp)) = .true.
+        banned(p(i,sp), p(j,sp)) = .true.
+      end do
+    end do
+  end if
+
+end subroutine past_d2
+
+! ---
+
+subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint)
+
+  BEGIN_DOC
+  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  integer, intent(out)           :: list(Nint*bit_kind_size)
+  integer, intent(out)           :: n_elements
+
+  integer                        :: i, ishift
+  integer(bit_kind)              :: l
+
+  n_elements = 0
+  ishift = 2
+  do i=1,Nint
+    l = string(i)
+    do while (l /= 0_bit_kind)
+      n_elements = n_elements+1
+      list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l)
+      l = iand(l,l-1_bit_kind)
+    enddo
+    ishift = ishift + bit_kind_size
+  enddo
+
+end subroutine bitstring_to_list_in_selection
+
+! ---
+
diff --git a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
new file mode 100644
index 00000000..0bd51464
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
@@ -0,0 +1,424 @@
+
+subroutine create_selection_buffer(N, size_in, res)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Allocates the memory for a selection buffer.
+! The arrays have dimension size_in and the maximum number of elements is N
+  END_DOC
+
+  integer, intent(in) :: N, size_in
+  type(selection_buffer), intent(out) :: res
+
+  integer :: siz
+  siz = max(size_in,1)
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = memory_of_double(siz)*(N_int*2+1)
+  call check_mem(rss,irp_here)
+
+  allocate(res%det(N_int, 2, siz), res%val(siz))
+
+  res%val(:) = 0d0
+  res%det(:,:,:) = 0_8
+  res%N = N
+  res%mini = 0d0
+  res%cur = 0
+end subroutine
+
+subroutine delete_selection_buffer(b)
+  use selection_types
+  implicit none
+  type(selection_buffer), intent(inout) :: b
+  if (associated(b%det)) then
+    deallocate(b%det)
+  endif
+  if (associated(b%val)) then
+    deallocate(b%val)
+  endif
+  NULLIFY(b%det)
+  NULLIFY(b%val)
+  b%cur = 0
+  b%mini = 0.d0
+  b%N = 0
+end
+
+
+subroutine add_to_selection_buffer(b, det, val)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer(bit_kind), intent(in) :: det(N_int, 2)
+  double precision, intent(in) :: val
+  integer :: i
+
+  if(b%N > 0 .and. val <= b%mini) then
+    b%cur += 1
+    b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
+    b%val(b%cur) = val
+    if(b%cur == size(b%val)) then
+      call sort_selection_buffer(b)
+    end if
+  end if
+end subroutine
+
+subroutine merge_selection_buffers(b1, b2)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Merges the selection buffers b1 and b2 into b2
+  END_DOC
+  type(selection_buffer), intent(inout) :: b1
+  type(selection_buffer), intent(inout) :: b2
+  integer(bit_kind), pointer     :: detmp(:,:,:)
+  double precision, pointer      :: val(:)
+  integer                        :: i, i1, i2, k, nmwen, sze
+  if (b1%cur == 0) return
+  do while (b1%val(b1%cur) > b2%mini)
+    b1%cur = b1%cur-1
+    if (b1%cur == 0) then
+      return
+    endif
+  enddo
+  nmwen = min(b1%N, b1%cur+b2%cur)
+  double precision :: rss
+  double precision, external :: memory_of_double
+  sze = max(size(b1%val), size(b2%val))
+  rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
+  call check_mem(rss,irp_here)
+  allocate(val(sze), detmp(N_int, 2, sze))
+  i1=1
+  i2=1
+  do i=1,nmwen
+    if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
+      exit
+    else if (i1 > b1%cur) then
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+    else if (i2 > b2%cur) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+    else
+      if (b1%val(i1) <= b2%val(i2)) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+      else
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+      endif
+    endif
+  enddo
+  deallocate(b2%det, b2%val)
+  do i=nmwen+1,b2%N
+    val(i) = 0.d0
+    detmp(1:N_int,1:2,i) = 0_bit_kind
+  enddo
+  b2%det => detmp
+  b2%val => val
+!  if(selection_tc == 1)then
+!   b2%mini = max(b2%mini,b2%val(b2%N))
+!  else
+   b2%mini = min(b2%mini,b2%val(b2%N))
+!  endif
+  b2%cur = nmwen
+end
+
+
+subroutine sort_selection_buffer(b)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer, allocatable :: iorder(:)
+  integer(bit_kind), pointer :: detmp(:,:,:)
+  integer :: i, nmwen
+  logical, external :: detEq
+  if (b%N == 0 .or. b%cur == 0) return
+  nmwen = min(b%N, b%cur)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
+  call check_mem(rss,irp_here)
+  allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)))
+  do i=1,b%cur
+    iorder(i) = i
+  end do
+  call dsort(b%val, iorder, b%cur)
+  do i=1, nmwen
+    detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
+    detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
+  end do
+  deallocate(b%det,iorder)
+  b%det => detmp
+!  if(selection_tc == 1)then
+!   b%mini = max(b%mini,b%val(b%N))
+!  else
+   b%mini = min(b%mini,b%val(b%N))
+!  endif
+  b%cur = nmwen
+end subroutine
+
+subroutine make_selection_buffer_s2(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: configuration_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    do k=1,N_int
+      o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i))
+      o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i))
+    enddo
+    iorder(i) = i
+    bit_tmp(i) = configuration_search_key(o(1,1,i),N_int)
+  enddo
+
+  deallocate(b%det)
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        val(i) = max(val(i), val(j))
+        duplicate(j) = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  deallocate (b%val)
+  ! Copy filtered result
+  integer :: n_p
+  n_p=0
+  do i=1,n_d
+    if (duplicate(i)) then
+      cycle
+    endif
+    n_p = n_p + 1
+    do k=1,N_int
+      o(k,1,n_p) = tmp_array(k,1,i)
+      o(k,2,n_p) = tmp_array(k,2,i)
+    enddo
+    val(n_p) = val(i)
+  enddo
+
+  ! Sort by importance
+  do i=1,n_p
+    iorder(i) = i
+  end do
+  call dsort(val,iorder,n_p)
+  do i=1,n_p
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+  enddo
+  do i=1,n_p
+    do k=1,N_int
+      o(k,1,i) = tmp_array(k,1,i)
+      o(k,2,i) = tmp_array(k,2,i)
+    enddo
+  enddo
+
+  ! Create determinants
+  n_d = 0
+  do i=1,n_p
+    call configuration_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int)
+    n_d = n_d + sze
+    if (n_d > b%cur) then
+!      if (n_d - b%cur > b%cur - n_d + sze) then
+!        n_d = n_d - sze
+!      endif
+      exit
+    endif
+  enddo
+
+  rss = (4*N_int+2)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(b%det(N_int,2,2*n_d), b%val(2*n_d))
+  k=1
+  do i=1,n_p
+    n=n_d
+    call configuration_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int)
+    call configuration_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int)
+    do j=k,k+n-1
+      b%val(j) = val(i)
+    enddo
+    k = k+n
+    if (k > n_d) exit
+  enddo
+  deallocate(o)
+  b%cur = n_d
+  b%N = n_d
+end
+
+
+
+
+subroutine remove_duplicates_in_selection_buffer(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: det_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  logical                        :: found_duplicates
+  double precision               :: rss
+  double precision, external     :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+
+  found_duplicates = .False.
+  allocate(iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    iorder(i) = i
+    bit_tmp(i) = det_search_key(b%det(1,1,i),N_int)
+  enddo
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = b%det(k,1,iorder(i))
+      tmp_array(k,2,i) = b%det(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        duplicate(j) = .True.
+        found_duplicates = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  if (found_duplicates) then
+
+    ! Copy filtered result
+    integer :: n_p
+    n_p=0
+    do i=1,n_d
+      if (duplicate(i)) then
+        cycle
+      endif
+      n_p = n_p + 1
+      do k=1,N_int
+        b%det(k,1,n_p) = tmp_array(k,1,i)
+        b%det(k,2,n_p) = tmp_array(k,2,i)
+      enddo
+      val(n_p) = val(i)
+    enddo
+    b%cur=n_p
+    b%N=n_p
+
+  endif
+
+end
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/selection_types.f90 b/src/cipsi_tc_bi_ortho/selection_types.f90
new file mode 100644
index 00000000..58ce0e03
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection_types.f90
@@ -0,0 +1,25 @@
+module selection_types
+  type selection_buffer
+    integer :: N, cur
+    integer(8)      , pointer :: det(:,:,:)
+    double precision, pointer :: val(:)
+    double precision          :: mini
+  endtype
+
+  type pt2_type
+    double precision, allocatable :: pt2(:)
+    double precision, allocatable :: rpt2(:)
+    double precision, allocatable :: variance(:)
+    double precision, allocatable :: overlap(:,:)
+  endtype
+
+  contains
+
+  integer function pt2_type_size(N)
+    implicit none
+    integer, intent(in) :: N
+    pt2_type_size = (3*n + n*n)
+  end function
+
+end module
+
diff --git a/src/cipsi_tc_bi_ortho/selection_weight.irp.f b/src/cipsi_tc_bi_ortho/selection_weight.irp.f
new file mode 100644
index 00000000..3c09e59a
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection_weight.irp.f
@@ -0,0 +1,134 @@
+BEGIN_PROVIDER [ double precision, pt2_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the PT2 contributions
+ ! of each state coincide.
+ END_DOC
+ pt2_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the variances
+ ! of each state coincide.
+ END_DOC
+ variance_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+subroutine update_pt2_and_variance_weights(pt2_data, N_st)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! Updates the PT2- and Variance- matching weights.
+  END_DOC
+  integer, intent(in)          :: N_st
+  type(pt2_type), intent(in)   :: pt2_data
+  double precision             :: pt2(N_st)
+  double precision             :: variance(N_st)
+
+  double precision :: avg, element, dt, x
+  integer          :: k
+  pt2(:)      = pt2_data % pt2(:)
+  variance(:) = pt2_data % variance(:)
+
+  avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  dt = 8.d0 !* selection_factor
+  do k=1,N_st
+    element = exp(dt*(pt2(k)/avg - 1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    pt2_match_weight(k) *= element
+  enddo
+
+
+  avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  do k=1,N_st
+    element = exp(dt*(variance(k)/avg -1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    variance_match_weight(k) *= element
+  enddo
+
+  if (N_det < 100) then
+    ! For tiny wave functions, weights are 1.d0
+    pt2_match_weight(:) = 1.d0
+    variance_match_weight(:) = 1.d0
+  endif
+
+  threshold_davidson_pt2 = min(1.d-6, &
+     max(threshold_davidson, 1.e-1 * PT2_relative_error * minval(abs(pt2(1:N_states)))) )
+
+  SOFT_TOUCH pt2_match_weight variance_match_weight threshold_davidson_pt2
+end
+
+
+
+
+BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Weights used in the selection criterion
+   END_DOC
+   select case (weight_selection)
+
+     case (0)
+      print *,  'Using input weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * state_average_weight(1:N_states)
+
+     case (1)
+      print *,  'Using 1/c_max^2 weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (2)
+      print *,  'Using pt2-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (3)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (4)
+      print *,  'Using variance- and pt2-matching weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states))
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (5)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (6)
+      print *,  'Using CI coefficient-based selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (7)
+      print *,  'Input weights multiplied by variance- and pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (8)
+      print *,  'Input weights multiplied by pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (9)
+      print *,  'Input weights multiplied by variance-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+    end select
+     print *, '# Total weight ', real(selection_weight(:),4)
+
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
new file mode 100644
index 00000000..6343bf8b
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
@@ -0,0 +1,348 @@
+subroutine run_slave_cipsi
+
+  BEGIN_DOC
+  ! Helper program for distributed parallelism
+  END_DOC
+
+  implicit none
+
+  call omp_set_max_active_levels(1)
+  distributed_davidson = .False.
+  read_wf = .False.
+  SOFT_TOUCH read_wf distributed_davidson
+  call provide_everything
+  call switch_qp_run_to_master
+  call run_slave_main
+end
+
+subroutine provide_everything
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
+
+  PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight
+  PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+end
+
+
+subroutine run_slave_main
+
+  use f77_zmq
+
+  implicit none
+  IRP_IF MPI
+    include 'mpif.h'
+  IRP_ENDIF
+
+  integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket
+  double precision :: energy(N_states)
+  character*(64) :: states(10)
+  character*(64) :: old_state
+  integer :: rc, i, ierr
+  double precision :: t0, t1
+
+  integer, external              :: zmq_get_dvector, zmq_get_N_det_generators
+  integer, external              :: zmq_get8_dvector
+  integer, external              :: zmq_get_ivector
+  integer, external              :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_psi_bilinear
+  integer, external              :: zmq_get_psi_notouch
+  integer, external              :: zmq_get_N_states_diag
+
+  zmq_context = f77_zmq_ctx_new ()
+  states(1) = 'selection'
+  states(2) = 'davidson'
+  states(3) = 'pt2'
+  old_state = 'Waiting'
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master
+  PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
+  PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+
+  IRP_IF MPI
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  do
+
+    if (mpi_master) then
+      call wait_for_states(states,zmq_state,size(states))
+      if (zmq_state(1:64) == old_state(1:64)) then
+        call usleep(200)
+        cycle
+      else
+        old_state(1:64) = zmq_state(1:64)
+      endif
+      print *,  trim(zmq_state)
+    endif
+
+    IRP_IF MPI_DEBUG
+      print *,  irp_here, mpi_rank
+      call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+    IRP_ENDIF
+    IRP_IF MPI
+      call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        print *,  irp_here, 'error in broadcast of zmq_state'
+      endif
+    IRP_ENDIF
+
+    if(zmq_state(1:7) == 'Stopped') then
+      exit
+    endif
+
+
+    if (zmq_state(1:9) == 'selection') then
+
+      ! Selection
+      ! ---------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight psi_det psi_coef
+
+      if (mpi_master) then
+        print *,  'N_det', N_det
+        print *,  'N_det_generators', N_det_generators
+        print *,  'N_det_selectors', N_det_selectors
+        print *,  'pt2_e0_denominator', pt2_e0_denominator
+        print *,  'pt2_stoch_istate', pt2_stoch_istate
+        print *,  'state_average_weight', state_average_weight
+        print *,  'selection_weight', selection_weight
+      endif
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      !$OMP PARALLEL PRIVATE(i)
+      i = omp_get_thread_num()
+      call run_selection_slave(0,i,energy)
+      !$OMP END PARALLEL
+      print *,  mpi_rank, ': Selection done'
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:8) == 'davidson') then
+
+      ! Davidson
+      ! --------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_states_diag')
+      IRP_ENDIF
+      if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      !---
+      call omp_set_max_active_levels(8)
+      call davidson_slave_tcp(0)
+      call omp_set_max_active_levels(1)
+      print *,  mpi_rank, ': Davidson done'
+      !---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:3) == 'pt2') then
+
+      ! PT2
+      ! ---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_ivector pt2_stoch_istate')
+      IRP_ENDIF
+      if (zmq_get_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight psi_det psi_coef N_det_generators N_det_selectors
+
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      if (.true.) then
+        integer :: nproc_target, ii
+        double precision :: mem_collector, mem, rss
+
+        call resident_memory(rss)
+
+        nproc_target = nthreads_pt2
+        ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+        do
+          mem = rss +                             & !
+                nproc_target * 8.d0 *             & ! bytes
+                ( 0.5d0*pt2_n_tasks_max           & ! task_id
+                + 64.d0*pt2_n_tasks_max           & ! task
+                + 3.d0*pt2_n_tasks_max*N_states   & ! pt2, variance, norm
+                + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+                + 3.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+                + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+                + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                    ! prefullinteresting, fullinteresting
+                + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+                + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+                ) / 1024.d0**3
+
+          if (nproc_target == 0) then
+            call check_mem(mem,irp_here)
+            nproc_target = 1
+            exit
+          endif
+
+          if (mem+rss < qp_max_mem) then
+            exit
+          endif
+
+          nproc_target = nproc_target - 1
+
+        enddo
+        
+        if (N_det > 100000) then
+
+          if (mpi_master) then
+            print *,  'N_det', N_det
+            print *,  'N_det_generators', N_det_generators
+            print *,  'N_det_selectors', N_det_selectors
+            print *,  'pt2_e0_denominator', pt2_e0_denominator
+            print *,  'pt2_stoch_istate', pt2_stoch_istate
+            print *,  'state_average_weight', state_average_weight
+            print *,  'selection_weight', selection_weight
+            print *,  'Number of threads', nproc_target
+          endif
+
+          if (h0_type == 'CFG') then
+            PROVIDE det_to_configuration
+          endif
+
+          PROVIDE global_selection_buffer pt2_N_teeth pt2_F N_det_generators
+          PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+          PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+          PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+          PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc
+
+          PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks
+
+          if (mpi_master) then
+            print *,  'Running PT2'
+          endif
+          !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
+          i = omp_get_thread_num()
+          call run_pt2_slave(0,i,pt2_e0_denominator)
+          !$OMP END PARALLEL
+          FREE state_average_weight
+          print *,  mpi_rank, ': PT2 done'
+          print *,  '-------'
+
+        endif
+      endif
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    endif
+
+  end do
+  IRP_IF MPI
+    call MPI_finalize(ierr)
+  IRP_ENDIF
+end
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
new file mode 100644
index 00000000..1f4fe849
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
@@ -0,0 +1,152 @@
+subroutine run_stochastic_cipsi
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson 
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  !!!! WARNING  !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION
+!  if (s2_eig) then
+!    call make_s2_eigenfunction
+!  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+
+!  if (N_det > N_det_max) then
+!    psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det)
+!    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+!    N_det = N_det_max
+!    soft_touch N_det psi_det psi_coef
+!   if (s2_eig) then
+!     call make_s2_eigenfunction
+!   endif
+!    print_pt2 = .False.
+!    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+!  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      print*,'maxval(abs(pt2_data % pt2(1:N_states)))',maxval(abs(pt2_data % pt2(1:N_states)))
+      print*,pt2_max
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    call pt2_dealloc(pt2_data)
+    call pt2_dealloc(pt2_data_err)
+    call pt2_alloc(pt2_data, N_states)
+    call pt2_alloc(pt2_data_err, N_states)
+    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+!    stop
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf_tc()
+
+    PROVIDE  psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm
+    pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1))
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    stop
+    if (qp_stop()) exit
+  enddo
+!  print*,'data to extrapolate '
+!  do i = 2, N_iter
+!   print*,'iteration ',i
+!   print*,'pt1,Ept2',pt1(i),ept2(i)
+!   call get_extrapolated_energy(i-1,ept2(i),pt1(i),extrap_energy(i))
+!   do j = 2, i
+!    print*,'j,e,energy',j,extrap_energy(j)
+!   enddo
+!  enddo
+
+! thresh_it_dav  = 5.d-6
+! soft_touch thresh_it_dav
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  if (.not.qp_stop()) then
+!    if (N_det < N_det_max) then
+!     thresh_it_dav  = 5.d-7
+!     soft_touch thresh_it_dav
+!     call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    endif
+!
+!    call pt2_dealloc(pt2_data)
+!    call pt2_dealloc(pt2_data_err)
+!    call pt2_alloc(pt2_data, N_states)
+!    call pt2_alloc(pt2_data_err, N_states)
+!    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
+!    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  endif
+!  call pt2_dealloc(pt2_data)
+!  call pt2_dealloc(pt2_data_err)
+!  call routine_save_right
+
+end
+
diff --git a/src/cipsi_tc_bi_ortho/zmq_selection.irp.f b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
new file mode 100644
index 00000000..dc3e0f27
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
@@ -0,0 +1,235 @@
+subroutine ZMQ_selection(N_in, pt2_data)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket , zmq_socket_pull
+  integer, intent(in)            :: N_in
+  type(selection_buffer)         :: b
+  integer                        :: i, l, N
+  integer, external              :: omp_get_thread_num
+  type(pt2_type), intent(inout)  :: pt2_data
+
+  PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators
+
+  N = max(N_in,1)
+  N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+  if (.True.) then
+    PROVIDE pt2_e0_denominator nproc
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order selection_weight pseudo_sym
+    PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+    PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+
+    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection')
+
+    integer, external              :: zmq_put_psi
+    integer, external              :: zmq_put_N_det_generators
+    integer, external              :: zmq_put_N_det_selectors
+    integer, external              :: zmq_put_dvector
+
+    if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+      stop 'Unable to put psi on ZMQ server'
+    endif
+    if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_generators on ZMQ server'
+    endif
+    if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_selectors on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+      stop 'Unable to put energy on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+      stop 'Unable to put state_average_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+      stop 'Unable to put selection_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+      stop 'Unable to put threshold_generators on ZMQ server'
+    endif
+    call create_selection_buffer(N, N*2, b)
+  endif
+
+  integer, external :: add_task_to_taskserver
+  character(len=100000)           :: task
+  integer :: j,k,ipos
+  ipos=1
+  task = ' '
+
+ 
+ do i= 1, N_det_generators
+    do j=1,pt2_F(i)
+      write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N
+      ipos += 30
+      if (ipos > 100000-30) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+        ipos=1
+      endif
+    end do
+  enddo
+  if (ipos > 1) then
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+      stop 'Unable to add task to task server'
+    endif
+  endif
+  N = max(N_in,1)
+
+
+  ASSERT (associated(b%det))
+  ASSERT (associated(b%val))
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  integer :: nproc_target
+  if (N_det < 3*nproc) then
+    nproc_target = N_det/4
+  else
+    nproc_target = nproc
+  endif
+  double precision :: mem
+  mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 +  3.d0 + 5.d0) / (1024.d0**3)
+  call write_double(6,mem,'Estimated memory/thread (Gb)')
+  if (qp_max_mem > 0) then
+    nproc_target = max(1,int(dble(qp_max_mem)/(0.1d0 + mem)))
+    nproc_target = min(nproc_target,nproc)
+  endif
+
+  f(:) = 1.d0
+  if (.not.do_pt2) then
+    double precision :: f(N_states), u_dot_u
+    do k=1,min(N_det,N_states)
+     f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+    enddo
+  endif
+
+  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2_data)  PRIVATE(i) NUM_THREADS(nproc_target+1)
+  i = omp_get_thread_num()
+  if (i==0) then
+    call selection_collector(zmq_socket_pull, b, N, pt2_data)
+  else
+    call selection_slave_inproc(i)
+  endif
+  !$OMP END PARALLEL
+
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection')
+  if (N_in > 0) then
+    if (s2_eig) then
+      call make_selection_buffer_s2(b)
+    endif
+    call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+  endif
+  call delete_selection_buffer(b)
+
+  do k=1,N_states
+    pt2_data % pt2(k) = pt2_data % pt2(k) * f(k)
+    pt2_data % variance(k) = pt2_data % variance(k) * f(k)
+    do l=1,N_states
+      pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l))
+      pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l))
+    enddo
+
+    pt2_data % rpt2(k) =  &
+       pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k))
+  enddo
+
+  pt2_overlap(:,:) = pt2_data % overlap(:,:)
+
+  print *, 'Overlap of perturbed states:'
+  do l=1,N_states
+    print *, pt2_overlap(l,:)
+  enddo
+  print *, '-------'
+  SOFT_TOUCH pt2_overlap
+  call update_pt2_and_variance_weights(pt2_data, N_states)
+
+end subroutine
+
+
+subroutine selection_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  call run_selection_slave(1,i,pt2_e0_denominator)
+end
+
+subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N
+  type(pt2_type), intent(inout)  :: pt2_data
+  type(pt2_type)                 :: pt2_data_tmp
+
+  double precision               :: pt2_mwen(N_states)
+  double precision               :: variance_mwen(N_states)
+  double precision               :: norm2_mwen(N_states)
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer :: msg_size, rc, more
+  integer :: acc, i, j, robin, ntask
+  double precision, pointer :: val(:)
+  integer(bit_kind), pointer :: det(:,:,:)
+  integer, allocatable :: task_id(:)
+  type(selection_buffer) :: b2
+
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N, N*2, b2)
+  integer :: k
+  double precision :: rss
+  double precision, external :: memory_of_int
+  rss = memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+  allocate(task_id(N_det_generators))
+  more = 1
+  pt2_data % pt2(:)      = 0d0
+  pt2_data % variance(:) = 0.d0
+  pt2_data % overlap(:,:) = 0.d0
+  call pt2_alloc(pt2_data_tmp,N_states)
+  do while (more == 1)
+    call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
+
+    call pt2_add(pt2_data, 1.d0, pt2_data_tmp)
+    do i=1, b2%cur
+      call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      if (b2%val(i) > b%mini) exit
+    end do
+
+    do i=1, ntask
+      if(task_id(i) == 0) then
+          print *,  "Error in collector"
+      endif
+      integer, external :: zmq_delete_task
+      if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then
+        stop 'Unable to delete task'
+      endif
+    end do
+  end do
+  call pt2_dealloc(pt2_data_tmp)
+
+
+  call delete_selection_buffer(b2)
+  call sort_selection_buffer(b)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+end subroutine
+
diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats
index 69b862b0..6b8fddb6 100644
--- a/src/cisd/30.cisd.bats
+++ b/src/cisd/30.cisd.bats
@@ -10,6 +10,7 @@ function run() {
   qp set determinants n_states  2
   qp set davidson threshold_davidson 1.e-12
   qp set davidson n_states_diag 24
+  qp run cis
   qp run cisd 
   energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)"
   energy2="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 2)"
@@ -20,26 +21,31 @@ function run() {
 
 @test "B-B" { # 
   qp set_file b2_stretched.ezfio
+  qp set_frozen_core
   run  -49.120607088648597 -49.055152453388231
 }
 
 @test "SiH2_3B1" { # 1.53842s 3.53856s
   qp set_file sih2_3b1.ezfio
+  qp set_frozen_core
   run -290.015949171697 -289.805036176618
 }
 
 @test "HBO" { # 4.42968s  19.6099s
   qp set_file hbo.ezfio
+  qp set_frozen_core
   run -100.2019254455993 -99.79484127741013 
 }
 
 @test "HCO" { # 6.6077s 28.6801s
   qp set_file hco.ezfio
+  qp set_frozen_core
   run -113.39088802205114 -113.22204293108558
 }
 
 @test "H2O" { # 7.0651s 30.6642s
   qp set_file h2o.ezfio
+  qp set_frozen_core
   run -76.22975602077072 -75.80609108747208 
 }
 
@@ -50,6 +56,7 @@ function run() {
 @test "H2S" { # 7.42152s 32.5461s
   [[ -n $TRAVIS ]] && skip
   qp set_file h2s.ezfio
+  qp set_frozen_core
   run -398.853701416768 -398.519020035337
 }
 
@@ -70,6 +77,7 @@ function run() {
 @test "OH" { # 18.2159s 1.28453m
   [[ -n $TRAVIS ]] && skip
   qp set_file oh.ezfio
+  qp set_frozen_core
   run -75.6087472926588 -75.5370393736601
 }
 
@@ -83,6 +91,7 @@ function run() {
 @test "SiH3" { # 20.2202s 1.38648m
   [[ -n $TRAVIS ]] && skip
   qp set_file sih3.ezfio
+  qp set_frozen_core
   run -5.57096611856522  -5.30950347928823
 }
 
@@ -103,6 +112,7 @@ function run() {
 @test "H3COH" { # 24.7248s 1.85043m
   [[ -n $TRAVIS ]] && skip
   qp set_file h3coh.ezfio
+  qp set_frozen_core
   run -115.204958752377 -114.755913828245
 }
 
@@ -117,6 +127,7 @@ function run() {
 @test "ClF" { # 30.3225s
   [[ -n $TRAVIS ]] && skip
   qp set_file clf.ezfio
+  qp set_frozen_core
   run -559.162476603880  -558.792395927088
 }
 
@@ -130,6 +141,7 @@ function run() {
 @test "ClO" { # 37.6949s
   [[ -n $TRAVIS ]] && skip
   qp set_file clo.ezfio
+  qp set_frozen_core
   run -534.5404021326773 -534.3818725793897 
 }
 
@@ -150,6 +162,7 @@ function run() {
 @test "SO" { # 51.2476s
   [[ -n $TRAVIS ]] && skip
   qp set_file so.ezfio
+  qp set_frozen_core
   run -26.0131812819785 -25.7053111980226
 }
 
diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index fca3b10e..5f167686 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -69,7 +69,9 @@ subroutine run
   do i = 1,N_states
     k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1)
     delta_E  = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int)
-    cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
+    if (elec_alpha_num + elec_beta_num >= 4) then
+      cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
+    endif
   enddo
   print *,  'N_det = ', N_det
   print*,''
@@ -78,26 +80,43 @@ subroutine run
   do i = 1,N_states
     print *,  i, CI_energy(i)
   enddo
-  print*,''
-  print*,'******************************'
-  print *,  'CISD+Q Energies'
-  do i = 1,N_states
-    print *,  i, cisdq(i)
-  enddo
+  if (elec_alpha_num + elec_beta_num >= 4) then
+    print*,''
+    print*,'******************************'
+    print *,  'CISD+Q Energies'
+    do i = 1,N_states
+      print *,  i, cisdq(i)
+    enddo
+  endif
   if (N_states > 1) then
-    print*,''
-    print*,'******************************'
-    print*,'Excitation energies (au)    (CISD+Q)'
-    do i = 2, N_states
-      print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1)
-    enddo
-    print*,''
-    print*,'******************************'
-    print*,'Excitation energies (eV)    (CISD+Q)'
-    do i = 2, N_states
-      print*, i ,(CI_energy(i) - CI_energy(1))/0.0367502d0, &
-        (cisdq(i) - cisdq(1)) / 0.0367502d0
-    enddo
+    if (elec_alpha_num + elec_beta_num >= 4) then
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (au)    (CISD+Q)'
+      do i = 2, N_states
+        print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1)
+      enddo
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (eV)    (CISD+Q)'
+      do i = 2, N_states
+        print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, &
+          (cisdq(i) - cisdq(1)) * ha_to_ev
+      enddo
+    else
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (au)    (CISD)'
+      do i = 2, N_states
+        print*, i ,CI_energy(i) - CI_energy(1)
+      enddo
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (eV)    (CISD)'
+      do i = 2, N_states
+        print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev
+      enddo
+    endif
   endif
 
 end
diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f
new file mode 100644
index 00000000..670b2395
--- /dev/null
+++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f
@@ -0,0 +1,500 @@
+
+! ---
+
+subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc)
+
+  use mmap_module
+
+  BEGIN_DOC
+  ! Generic modified-Davidson diagonalization 
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output)
+  END_DOC
+
+  implicit none
+
+  integer,           intent(in)   :: sze, N_st, N_st_diag_in
+  double precision,  intent(in)   :: H_jj(sze),Dress_jj(sze)
+  logical,          intent(inout) :: converged
+  double precision, intent(inout) :: u_in(sze,N_st_diag_in)
+  double precision, intent(out)   :: energies(N_st)
+  external                           hcalc
+
+  character*(16384)               :: write_buffer
+  integer                         :: iter, N_st_diag
+  integer                         :: i, j, k, l, m
+  integer                         :: iter2, itertot
+  logical                         :: disk_based
+  integer                         :: shift, shift2, itermax
+  integer                         :: nproc_target
+  integer                         :: order(N_st_diag_in)
+  double precision                :: to_print(2,N_st)
+  double precision                :: r1, r2, alpha
+  double precision                :: cpu, wall
+  double precision                :: cmax
+  double precision                :: energy_shift(N_st_diag_in*davidson_sze_max)
+  double precision, allocatable   :: U(:,:)
+  double precision, allocatable   :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable   :: residual_norm(:)
+
+  double precision                :: lambda_tmp
+  integer,          allocatable   :: i_omax(:)
+  double precision, allocatable   :: U_tmp(:), overlap(:)
+
+  double precision, allocatable :: W(:,:)
+  !double precision, pointer       :: W(:,:)
+  double precision, external      :: u_dot_v, u_dot_u
+
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in 
+!  print*,'trial vector'
+   do i = 1, sze
+    if(isnan(u_in(i,1)))then
+     print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space'
+     print*,i,u_in(i,1)
+     stop
+    else if (dabs(u_in(i,1)).lt.1.d-16)then
+     u_in(i,1) = 0.d0
+    endif
+   enddo
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+
+  provide threshold_nonsym_davidson 
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = sze 
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.d0*dble(sze*m)*(N_st_diag*itermax)   &! W
+         + 2.d0*(N_st_diag*itermax)**2            &! h,y
+         + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_S2_u_0_nstates_zmq
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+         + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m = 0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of basis functions')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy         Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+  ! ---
+
+
+  allocate( W(sze,N_st_diag*itermax) )
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      lambda(N_st_diag*itermax),                                     & 
+      residual_norm(N_st_diag),                                      &
+      i_omax(N_st)                                                   &
+  )
+
+  U = 0.d0
+  h = 0.d0
+  y = 0.d0
+  lambda = 0.d0
+  residual_norm = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  ! Initialize from N_st to N_st_diag with gaussian random numbers
+  ! to be sure to have overlap with any eigenvectors
+  do k = N_st+1, N_st_diag
+    u_in(k,k) = 10.d0
+    do i = 1, sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
+  enddo
+  ! Normalize all states 
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  ! Copy from the guess input "u_in" to the working vectors "U"
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+  ! ---
+
+  itertot = 0
+
+  do while (.not.converged)
+
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag * (iter-1)
+      shift2 = N_st_diag * iter
+
+      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! W = H U
+        call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        call dress_calc(W(1,shift+1), Dress_jj, U(1,shift+1), N_st_diag, sze)
+
+      else
+
+        ! Already computed in update below
+        continue
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1) )
+
+
+      ! Diagonalize h y = lambda y
+      ! ---------------------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      ! Express eigenvectors of h in the determinant basis:
+      ! ---------------------------------------------------
+
+      ! y(:,k) = rk
+      ! U(:,k) = Bk 
+      ! U(:,shift2+k) = Rk = Bk x rk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           & 
+                , 0.d0, U(1,shift2+1), size(U, 1) )
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      ! ---
+      ! select the max overlap
+
+      !
+      ! start test ------------------------------------------------------------------------
+      !
+      !double precision, allocatable :: Utest(:,:), Otest(:)
+      !allocate( Utest(sze,shift2), Otest(shift2) )
+
+      !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 &
+      !          , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) )
+      !do k = 1, shift2
+      !  call normalize(Utest(1,k), sze)
+      !enddo
+      !do j = 1, sze
+      !  write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2)
+      !enddo
+
+      !do k = 1, shift2 
+      !  Otest(k) = 0.d0
+      !  do i = 1, sze
+      !    Otest(k) += Utest(i,k) * u_in(i,1)
+      !  enddo
+      !  Otest(k) = dabs(Otest(k))
+      !  print *, ' Otest =', k, Otest(k), lambda(k)
+      !enddo
+     
+      !deallocate(Utest, Otest)
+      !
+      ! end test ------------------------------------------------------------------------
+      !
+
+      ! TODO 
+      ! state_following is more efficient
+      do l = 1, N_st
+
+        allocate( overlap(N_st_diag) )
+
+        do k = 1, N_st_diag
+          overlap(k) = 0.d0
+          do i = 1, sze
+            overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l)
+          enddo
+          overlap(k) = dabs(overlap(k))
+          !print *, ' overlap =', k, overlap(k)
+        enddo
+
+        lambda_tmp = 0.d0 
+        do k = 1, N_st_diag
+          if(overlap(k) .gt. lambda_tmp) then 
+            i_omax(l)  = k
+            lambda_tmp = overlap(k)
+          endif
+        enddo
+
+        deallocate(overlap)
+
+        if(lambda_tmp .lt. 0.7d0) then
+          print *, ' very small overlap ...', l, i_omax(l)
+          print *, ' max overlap = ', lambda_tmp
+          stop
+        endif
+
+        if(i_omax(l) .ne. l) then
+          print *, ' !!! WARNONG !!!'
+          print *, ' index of state', l, i_omax(l)
+        endif
+      enddo
+
+      ! y(:,k) = rk
+      ! W(:,k) = H x Bk 
+      ! W(:,shift2+k) = H x Bk x rk
+      !               = Wk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W, 1) )
+
+      ! ---
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+        if(k <= N_st) then
+          l = k
+          residual_norm(k) = u_dot_u(U(1,shift2+l), sze)
+          to_print(1,k)    = lambda(l) 
+          to_print(2,k)    = residual_norm(l)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+      !residual_norm(1) = u_dot_u(U(1,shift2+1), sze)
+      !to_print(1,1) = lambda(1) 
+      !to_print(2,1) = residual_norm(1)
+
+
+      if( (itertot > 1) .and. (iter == 1) ) then
+        !don't print 
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+      endif   
+      
+      do k = 1, N_st
+        if(residual_norm(k) > 1.e8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+    enddo ! loop over iter
+
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0  &
+              , W, size(W, 1), y, size(y, 1)            &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1)           &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    do j = 1, N_st_diag
+      k = 1
+      do while( (k < sze) .and. (U(k,j) == 0.d0) )
+        k = k+1
+      enddo
+      if(U(k,j) * u_in(k,j) < 0.d0) then
+        do i = 1, sze
+          W(i,j) = -W(i,j)
+        enddo
+      endif
+    enddo
+
+  enddo ! loop over while
+
+  ! ---
+
+  do k = 1, N_st
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  deallocate(W)
+  deallocate(U, h, y, lambda, residual_norm, i_omax)
+
+  FREE nthreads_davidson
+
+end subroutine davidson_general_ext_rout_nonsym_b1space
+
+! ---
+
+subroutine dress_calc(v,dress,u,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computed the action of the diagonal dressing dress
+  !
+  ! WARNING :: v is not initialiazed !!!
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st),dress(sze)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,istate
+  
+  do istate = 1, N_st
+   do i = 1, sze
+    v(i,istate) += dress(i) * u(i,istate)
+   enddo
+  enddo
+end
+
+
+
+
+
+
diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
new file mode 100644
index 00000000..1bed60fe
--- /dev/null
+++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
@@ -0,0 +1,473 @@
+
+! ---
+
+subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc)
+
+  use mmap_module
+
+  BEGIN_DOC
+  ! Generic modified-Davidson diagonalization 
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output)
+  END_DOC
+
+  implicit none
+
+  integer,           intent(in)   :: sze, N_st, N_st_diag_in
+  double precision,  intent(in)   :: H_jj(sze)
+  logical,          intent(inout) :: converged
+  double precision, intent(inout) :: u_in(sze,N_st_diag_in)
+  double precision, intent(out)   :: energies(N_st)
+  external                           hcalc
+
+  character*(16384)               :: write_buffer
+  integer                         :: iter, N_st_diag
+  integer                         :: i, j, k, l, m
+  integer                         :: iter2, itertot
+  logical                         :: disk_based
+  integer                         :: shift, shift2, itermax
+  integer                         :: nproc_target
+  integer                         :: order(N_st_diag_in)
+  double precision                :: to_print(2,N_st)
+  double precision                :: r1, r2, alpha
+  double precision                :: cpu, wall
+  double precision                :: cmax
+  double precision                :: energy_shift(N_st_diag_in*davidson_sze_max)
+  double precision, allocatable   :: U(:,:)
+  double precision, allocatable   :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable   :: residual_norm(:)
+
+  double precision                :: lambda_tmp
+  integer,          allocatable   :: i_omax(:)
+  double precision, allocatable   :: U_tmp(:), overlap(:)
+
+  double precision, allocatable :: W(:,:)
+  !double precision, pointer       :: W(:,:)
+  double precision, external      :: u_dot_v, u_dot_u
+
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in 
+!  print*,'trial vector'
+   do i = 1, sze
+    if(isnan(u_in(i,1)))then
+     print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space'
+     print*,i,u_in(i,1)
+     stop
+    else if (dabs(u_in(i,1)).lt.1.d-16)then
+     u_in(i,1) = 0.d0
+    endif
+   enddo
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+
+  provide threshold_nonsym_davidson 
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = sze 
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.d0*dble(sze*m)*(N_st_diag*itermax)   &! W
+         + 2.d0*(N_st_diag*itermax)**2            &! h,y
+         + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_S2_u_0_nstates_zmq
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+         + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+!    else if (m==1.and.disk_based_davidson) then
+!      m = 0
+!      disk_based = .True.
+!      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of basis functions')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy         Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+  ! ---
+
+
+  allocate( W(sze,N_st_diag*itermax) )
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      lambda(N_st_diag*itermax),                                     & 
+      residual_norm(N_st_diag),                                      &
+      i_omax(N_st)                                                   &
+  )
+
+  U = 0.d0
+  h = 0.d0
+  y = 0.d0
+  lambda = 0.d0
+  residual_norm = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  ! Initialize from N_st to N_st_diag with gaussian random numbers
+  ! to be sure to have overlap with any eigenvectors
+  do k = N_st+1, N_st_diag
+    u_in(k,k) = 10.d0
+    do i = 1, sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
+  enddo
+  ! Normalize all states 
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  ! Copy from the guess input "u_in" to the working vectors "U"
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+  ! ---
+
+  itertot = 0
+
+  do while (.not.converged)
+
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag * (iter-1)
+      shift2 = N_st_diag * iter
+
+      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! W = H U
+        call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+
+      else
+
+        ! Already computed in update below
+        continue
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1) )
+
+
+      ! Diagonalize h y = lambda y
+      ! ---------------------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      ! Express eigenvectors of h in the determinant basis:
+      ! ---------------------------------------------------
+
+      ! y(:,k) = rk
+      ! U(:,k) = Bk 
+      ! U(:,shift2+k) = Rk = Bk x rk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           & 
+                , 0.d0, U(1,shift2+1), size(U, 1) )
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      ! ---
+      ! select the max overlap
+
+      !
+      ! start test ------------------------------------------------------------------------
+      !
+      !double precision, allocatable :: Utest(:,:), Otest(:)
+      !allocate( Utest(sze,shift2), Otest(shift2) )
+
+      !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 &
+      !          , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) )
+      !do k = 1, shift2
+      !  call normalize(Utest(1,k), sze)
+      !enddo
+      !do j = 1, sze
+      !  write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2)
+      !enddo
+
+      !do k = 1, shift2 
+      !  Otest(k) = 0.d0
+      !  do i = 1, sze
+      !    Otest(k) += Utest(i,k) * u_in(i,1)
+      !  enddo
+      !  Otest(k) = dabs(Otest(k))
+      !  print *, ' Otest =', k, Otest(k), lambda(k)
+      !enddo
+     
+      !deallocate(Utest, Otest)
+      !
+      ! end test ------------------------------------------------------------------------
+      !
+
+      ! TODO 
+      ! state_following is more efficient
+      do l = 1, N_st
+
+        allocate( overlap(N_st_diag) )
+
+        do k = 1, N_st_diag
+          overlap(k) = 0.d0
+          do i = 1, sze
+            overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l)
+          enddo
+          overlap(k) = dabs(overlap(k))
+          !print *, ' overlap =', k, overlap(k)
+        enddo
+
+        lambda_tmp = 0.d0 
+        do k = 1, N_st_diag
+          if(overlap(k) .gt. lambda_tmp) then 
+            i_omax(l)  = k
+            lambda_tmp = overlap(k)
+          endif
+        enddo
+
+        deallocate(overlap)
+
+        if(lambda_tmp .lt. 0.7d0) then
+          print *, ' very small overlap ...', l, i_omax(l)
+          print *, ' max overlap = ', lambda_tmp
+          stop
+        endif
+
+        if(i_omax(l) .ne. l) then
+          print *, ' !!! WARNONG !!!'
+          print *, ' index of state', l, i_omax(l)
+        endif
+      enddo
+
+      ! y(:,k) = rk
+      ! W(:,k) = H x Bk 
+      ! W(:,shift2+k) = H x Bk x rk
+      !               = Wk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W, 1) )
+
+      ! ---
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+        if(k <= N_st) then
+          l = k
+          residual_norm(k) = u_dot_u(U(1,shift2+l), sze)
+          to_print(1,k)    = lambda(l) 
+          to_print(2,k)    = residual_norm(l)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+      !residual_norm(1) = u_dot_u(U(1,shift2+1), sze)
+      !to_print(1,1) = lambda(1) 
+      !to_print(2,1) = residual_norm(1)
+
+
+      if( (itertot > 1) .and. (iter == 1) ) then
+        !don't print 
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+      endif   
+      
+      do k = 1, N_st
+        if(residual_norm(k) > 1.e8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+    enddo ! loop over iter
+
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0  &
+              , W, size(W, 1), y, size(y, 1)            &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1)           &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    do j = 1, N_st_diag
+      k = 1
+      do while( (k < sze) .and. (U(k,j) == 0.d0) )
+        k = k+1
+      enddo
+      if(U(k,j) * u_in(k,j) < 0.d0) then
+        do i = 1, sze
+          W(i,j) = -W(i,j)
+        enddo
+      endif
+    enddo
+
+  enddo ! loop over while
+
+  ! ---
+
+  do k = 1, N_st
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  deallocate(W)
+  deallocate(U, h, y, lambda, residual_norm, i_omax)
+
+  FREE nthreads_davidson
+
+end subroutine davidson_general_ext_rout_nonsym_b1space
+
+! ---
diff --git a/src/davidson/EZFIO.cfg b/src/davidson/EZFIO.cfg
index 92c41b4c..bfa55526 100644
--- a/src/davidson/EZFIO.cfg
+++ b/src/davidson/EZFIO.cfg
@@ -4,6 +4,12 @@ doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
 interface: ezfio,provider,ocaml
 default: 1.e-10
 
+[threshold_nonsym_davidson]
+type: Threshold
+doc: Thresholds of non-symetric Davidson's algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
 [threshold_davidson_from_pt2]
 type: logical
 doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
diff --git a/src/davidson/input.irp.f b/src/davidson/input.irp.f
index aba88ae9..b37c87d0 100644
--- a/src/davidson/input.irp.f
+++ b/src/davidson/input.irp.f
@@ -30,10 +30,5 @@ BEGIN_PROVIDER [ integer, n_states_diag  ]
     endif
   IRP_ENDIF
 
-  call write_time(6)
-  if (mpi_master) then
-    write(6, *) 'Read  n_states_diag'
-  endif
-
 END_PROVIDER
 
diff --git a/src/determinants/determinants.irp.f b/src/determinants/determinants.irp.f
index b6b11485..6960a4d4 100644
--- a/src/determinants/determinants.irp.f
+++ b/src/determinants/determinants.irp.f
@@ -589,6 +589,67 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
   endif
 end
 
+subroutine save_wavefunction_general_unormalized(ndet,nstates,psidet,dim_psicoef,psicoef)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  include 'constants.include.F'
+  integer, intent(in)            :: ndet,nstates,dim_psicoef
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,ndet)
+  double precision, intent(in)   :: psicoef(dim_psicoef,nstates)
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  double precision, allocatable  :: psi_coef_save(:,:)
+
+  double precision               :: accu_norm
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndet,N_det_qp_edit)
+
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndet)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    allocate (psi_det_save(N_int,2,ndet))
+    do i=1,ndet
+      do j=1,2
+        do k=1,N_int
+          psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psi_coef_save(ndet,nstates))
+    do k=1,nstates
+      do i=1,ndet
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_determinants_psi_coef(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    allocate (psi_coef_save(ndet_qp_edit,nstates))
+    do k=1,nstates
+      do i=1,ndet_qp_edit
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_determinants_psi_coef_qp_edit(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    call write_int(6,ndet,'Saved determinants')
+  endif
+end
 
 
 subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,index_det_save)
diff --git a/src/determinants/tr_density_matrix.irp.f b/src/determinants/tr_density_matrix.irp.f
new file mode 100644
index 00000000..1e94edcb
--- /dev/null
+++ b/src/determinants/tr_density_matrix.irp.f
@@ -0,0 +1,313 @@
+BEGIN_PROVIDER [double precision, one_e_tr_dm_mo, (mo_num, mo_num, N_states, N_states)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! One body transition density matrix for all pairs of states n and m, < Psi^n | a_i^\dagger a_a | Psi^m >
+  END_DOC
+
+  integer                        :: j,k,l,m,k_a,k_b,n
+  integer                        :: occ(N_int*bit_kind_size,2)
+  double precision               :: ck, cl, ckl
+  double precision               :: phase
+  integer                        :: h1,h2,p1,p2,s1,s2, degree
+  integer(bit_kind)              :: tmp_det(N_int,2), tmp_det2(N_int)
+  integer                        :: exc(0:2,2),n_occ(2)
+  double precision, allocatable  :: tmp_a(:,:,:,:), tmp_b(:,:,:,:)
+  integer                        :: krow, kcol, lrow, lcol
+
+  PROVIDE psi_det
+
+  one_e_tr_dm_mo = 0d0
+ 
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,&
+      !$OMP  tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)&
+      !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num,  &
+      !$OMP  elec_beta_num,one_e_tr_dm_mo,N_det,&
+      !$OMP  mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,&
+      !$OMP  psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,&
+      !$OMP  psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,&
+      !$OMP  psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,&
+      !$OMP  N_det_alpha_unique,N_det_beta_unique,irp_here)
+  allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) )
+  tmp_a = 0.d0
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=1,N_det
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      do n = 1, N_states
+        ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n)
+        do l=1,elec_alpha_num
+          j = occ(l,1)
+          tmp_a(j,j,m,n) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_a == N_det) cycle
+    l = k_a+1
+    lrow = psi_bilinear_matrix_rows(l)
+    lcol = psi_bilinear_matrix_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lcol == kcol )
+      tmp_det2(:) = psi_det_alpha_unique(:, lrow)
+      call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        do m=1,N_states
+          do n = 1, N_states
+            ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase
+            tmp_a(h1,p1,m,n) += ckl
+            ckl = psi_bilinear_matrix_values(k_a,n)*psi_bilinear_matrix_values(l,m) * phase
+            tmp_a(p1,h1,m,n) += ckl
+          enddo
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_rows(l)
+      lcol = psi_bilinear_matrix_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  one_e_tr_dm_mo(:,:,:,:) = one_e_tr_dm_mo(:,:,:,:) + tmp_a(:,:,:,:)
+  !$OMP END CRITICAL
+  deallocate(tmp_a)
+  !$OMP BARRIER
+
+  tmp_b = 0.d0
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_b=1,N_det
+    krow = psi_bilinear_matrix_transp_rows(k_b)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_transp_columns(k_b)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      do n = 1, N_states
+        ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n)
+        do l=1,elec_beta_num
+          j = occ(l,2)
+          tmp_b(j,j,m,n) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_b == N_det) cycle
+    l = k_b+1
+    lrow = psi_bilinear_matrix_transp_rows(l)
+    lcol = psi_bilinear_matrix_transp_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lrow == krow )
+      tmp_det2(:) = psi_det_beta_unique(:, lcol)
+      call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        do m=1,N_states
+          do n = 1, N_states
+            ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase
+            tmp_b(h1,p1,m,n) += ckl
+            ckl = psi_bilinear_matrix_transp_values(k_b,n)*psi_bilinear_matrix_transp_values(l,m) * phase
+            tmp_b(p1,h1,m,n) += ckl
+          enddo
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l)
+      lcol = psi_bilinear_matrix_transp_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  one_e_tr_dm_mo(:,:,:,:)  = one_e_tr_dm_mo(:,:,:,:)  + tmp_b(:,:,:,:)
+  !$OMP END CRITICAL
+
+  deallocate(tmp_b)
+  !$OMP END PARALLEL
+
+END_PROVIDER
+ BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_alpha, (mo_num,mo_num,N_states,N_states) ]
+&BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_beta, (mo_num,mo_num,N_states,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! $\alpha$ and $\beta$ one-body transition density matrices for all pairs of states
+  END_DOC
+
+  integer                        :: j,k,l,m,n,k_a,k_b
+  integer                        :: occ(N_int*bit_kind_size,2)
+  double precision               :: ck, cl, ckl
+  double precision               :: phase
+  integer                        :: h1,h2,p1,p2,s1,s2, degree
+  integer(bit_kind)              :: tmp_det(N_int,2), tmp_det2(N_int)
+  integer                        :: exc(0:2,2),n_occ(2)
+  double precision, allocatable  :: tmp_a(:,:,:,:), tmp_b(:,:,:,:)
+  integer                        :: krow, kcol, lrow, lcol
+
+  PROVIDE psi_det
+
+  one_e_tr_dm_mo_alpha = 0.d0
+  one_e_tr_dm_mo_beta  = 0.d0
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(j,k,k_a,k_b,l,m,n,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,&
+      !$OMP  tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)&
+      !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num,  &
+      !$OMP  elec_beta_num,one_e_tr_dm_mo_alpha,one_e_tr_dm_mo_beta,N_det,&
+      !$OMP  mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,&
+      !$OMP  psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,&
+      !$OMP  psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,&
+      !$OMP  psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,&
+      !$OMP  N_det_alpha_unique,N_det_beta_unique,irp_here)
+  allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) )
+  tmp_a = 0.d0
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=1,N_det
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      do n = 1, N_states
+        ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n)
+        do l=1,elec_alpha_num
+          j = occ(l,1)
+          tmp_a(j,j,m,n) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_a == N_det) cycle
+    l = k_a+1
+    lrow = psi_bilinear_matrix_rows(l)
+    lcol = psi_bilinear_matrix_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lcol == kcol )
+      tmp_det2(:) = psi_det_alpha_unique(:, lrow)
+      call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        do m=1,N_states
+          do n = 1, N_states
+            ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase
+            tmp_a(h1,p1,m,n) += ckl
+            tmp_a(p1,h1,m,n) += ckl
+          enddo
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_rows(l)
+      lcol = psi_bilinear_matrix_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  one_e_tr_dm_mo_alpha(:,:,:,:) = one_e_tr_dm_mo_alpha(:,:,:,:) + tmp_a(:,:,:,:)
+  !$OMP END CRITICAL
+  deallocate(tmp_a)
+
+  tmp_b = 0.d0
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_b=1,N_det
+    krow = psi_bilinear_matrix_transp_rows(k_b)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_transp_columns(k_b)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      do n = 1, N_states
+        ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n)
+        do l=1,elec_beta_num
+          j = occ(l,2)
+          tmp_b(j,j,m,n) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_b == N_det) cycle
+    l = k_b+1
+    lrow = psi_bilinear_matrix_transp_rows(l)
+    lcol = psi_bilinear_matrix_transp_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lrow == krow )
+      tmp_det2(:) = psi_det_beta_unique(:, lcol)
+      call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        do m=1,N_states
+          do n = 1, N_states
+            ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase
+            tmp_b(h1,p1,m,n) += ckl
+            tmp_b(p1,h1,m,n) += ckl
+          enddo
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l)
+      lcol = psi_bilinear_matrix_transp_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  one_e_tr_dm_mo_beta(:,:,:,:)  = one_e_tr_dm_mo_beta(:,:,:,:)  + tmp_b(:,:,:,:)
+  !$OMP END CRITICAL
+
+  deallocate(tmp_b)
+  !$OMP END PARALLEL
+
+END_PROVIDER
+
diff --git a/src/dft_utils_func/rho_ab_to_rho_tot.irp.f b/src/dft_utils_func/rho_ab_to_rho_tot.irp.f
index 919543fe..d4d5a536 100644
--- a/src/dft_utils_func/rho_ab_to_rho_tot.irp.f
+++ b/src/dft_utils_func/rho_ab_to_rho_tot.irp.f
@@ -66,10 +66,27 @@ subroutine v_rho_oc_to_v_rho_ab(v_rho_o,v_rho_c,v_rho_a,v_rho_b)
  END_DOC
  double precision, intent(in)  :: v_rho_o,v_rho_c
  double precision, intent(out) :: v_rho_a,v_rho_b
+! print*,'in v_rho_oc_to_v_rho_ab'
+! print*, v_rho_c , v_rho_o
  v_rho_a = v_rho_c + v_rho_o
  v_rho_b = v_rho_c - v_rho_o
 end
 
+subroutine v_grad_rho_ab_to_v_grad_rho_oc(v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b,v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c)
+ implicit none
+ double precision, intent(in) :: v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b
+ double precision, intent(out)  :: v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c
+ BEGIN_DOC
+! convert  (v_grad_rho_a_2, v_grad_rho_b_2, v_grad_rho_a.grad_rho_b) 
+!
+! to       (v_grad_rho_c_2, v_grad_rho_o_2, v_grad_rho_o.grad_rho_c) 
+!
+! rho_c = total density, rho_o spin density
+ END_DOC
+ v_grad_rho_c_2 = 0.25d0 * (v_grad_rho_a_2 + v_grad_rho_b_2 + v_grad_rho_a_b)
+ v_grad_rho_o_2 = 0.25d0 * (v_grad_rho_a_2 + v_grad_rho_b_2 - v_grad_rho_a_b)
+ v_grad_rho_o_c = 0.25d0 * (2d0 * v_grad_rho_a_2 - 2d0 * v_grad_rho_b_2    )
+end
 
 
 subroutine v_grad_rho_oc_to_v_grad_rho_ab(v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c,v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b)
@@ -88,21 +105,3 @@ subroutine v_grad_rho_oc_to_v_grad_rho_ab(v_grad_rho_o_2,v_grad_rho_c_2,v_grad_r
  v_grad_rho_a_b = -2d0 * v_grad_rho_o_2 + 2d0 * v_grad_rho_c_2
 end
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 38478d21..b8beea76 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -169,4 +169,43 @@
  enddo
  END_PROVIDER
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none                                                                                                                                                                                             
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i)
+  enddo
+ enddo
+
+ END_PROVIDER
 
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..39ea0cdf
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,155 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+ END_DOC
+ ao_prod_abs_r = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/dft_utils_in_r/dm_in_r.irp.f b/src/dft_utils_in_r/dm_in_r.irp.f
index 53e15b06..e4e56ebf 100644
--- a/src/dft_utils_in_r/dm_in_r.irp.f
+++ b/src/dft_utils_in_r/dm_in_r.irp.f
@@ -45,6 +45,8 @@
    call density_and_grad_alpha_beta_and_all_aos_and_grad_aos_at_r(r,dm_a,dm_b,  dm_a_grad, dm_b_grad, aos_array, grad_aos_array)
 
    ! alpha/beta density 
+   dm_a(istate) = max(dm_a(istate),1.d-12)
+   dm_b(istate) = max(dm_b(istate),1.d-12)
    one_e_dm_and_grad_alpha_in_r(4,i,istate)  =  dm_a(istate)
    one_e_dm_and_grad_beta_in_r(4,i,istate)   =  dm_b(istate)
 
@@ -80,6 +82,7 @@
   enddo
  enddo
  !$OMP END PARALLEL DO
+! print*,'density and gradients provided'
 
 END_PROVIDER
 
diff --git a/src/fci_tc_bi/13.fci_tc_bi_ortho.bats b/src/fci_tc_bi/13.fci_tc_bi_ortho.bats
new file mode 100644
index 00000000..7f5d0a9f
--- /dev/null
+++ b/src/fci_tc_bi/13.fci_tc_bi_ortho.bats
@@ -0,0 +1,26 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_O() {
+  qp set_file O_tc_scf 
+  FILE=O_tc_scf/tc_bi_ortho/psi_l_coef_bi_ortho.gz
+  if test -f "$FILE"; then
+    rm O_tc_scf/tc_bi_ortho/psi*
+  fi
+  qp set determinants n_det_max 20000
+  file=${EZFIO_FILE}.fci_tc_bi_ortho.out
+  qp run fci_tc_bi_ortho | tee $file
+  eref=-74.971188861115309
+  energy="$(grep 'E(before) +rPT2   =' $file | tail -1 | cut -d '=' -f 2)"
+  eq $energy $eref 1e-4
+}
+
+
+@test "O" {
+ run_O 
+}
+
+
diff --git a/src/fci_tc_bi/EZFIO.cfg b/src/fci_tc_bi/EZFIO.cfg
new file mode 100644
index 00000000..a2552c74
--- /dev/null
+++ b/src/fci_tc_bi/EZFIO.cfg
@@ -0,0 +1,17 @@
+[energy]
+type: double precision
+doc: Calculated Selected |FCI| energy
+interface: ezfio
+size: (determinants.n_states)
+
+[energy_pt2]
+type: double precision
+doc: Calculated |FCI| energy + |PT2|
+interface: ezfio
+size: (determinants.n_states)
+
+[cipsi_tc]
+type: character*(32)
+doc: TODO
+interface: ezfio,provider,ocaml
+default: h_tc
diff --git a/src/fci_tc_bi/NEED b/src/fci_tc_bi/NEED
new file mode 100644
index 00000000..000b0deb
--- /dev/null
+++ b/src/fci_tc_bi/NEED
@@ -0,0 +1,3 @@
+tc_bi_ortho
+davidson_undressed
+cipsi_tc_bi_ortho
diff --git a/src/fci_tc_bi/class.irp.f b/src/fci_tc_bi/class.irp.f
new file mode 100644
index 00000000..b4a68ac2
--- /dev/null
+++ b/src/fci_tc_bi/class.irp.f
@@ -0,0 +1,12 @@
+ BEGIN_PROVIDER [ logical, do_only_1h1p ]
+&BEGIN_PROVIDER [ logical, do_only_cas  ]
+&BEGIN_PROVIDER [ logical, do_ddci ]
+ implicit none
+ BEGIN_DOC
+ ! In the FCI case, all those are always false
+ END_DOC
+ do_only_1h1p = .False.
+ do_only_cas  = .False.
+ do_ddci = .False.
+END_PROVIDER
+
diff --git a/src/fci_tc_bi/copy_wf.irp.f b/src/fci_tc_bi/copy_wf.irp.f
new file mode 100644
index 00000000..c46f20d2
--- /dev/null
+++ b/src/fci_tc_bi/copy_wf.irp.f
@@ -0,0 +1,215 @@
+
+use bitmasks
+
+subroutine copy_H_apply_buffer_to_wf_tc
+  use omp_lib
+  implicit none
+  BEGIN_DOC
+! Copies the H_apply buffer to psi_coef.
+! After calling this subroutine, N_det, psi_det and psi_coef need to be touched
+  END_DOC
+  integer(bit_kind), allocatable :: buffer_det(:,:,:)
+  double precision, allocatable  :: buffer_r_coef(:,:), buffer_l_coef(:,:)
+  integer                        :: i,j,k
+  integer                        :: N_det_old
+
+  PROVIDE H_apply_buffer_allocated
+
+
+  ASSERT (N_int > 0)
+  ASSERT (N_det > 0)
+
+  allocate ( buffer_det(N_int,2,N_det), buffer_r_coef(N_det,N_states), buffer_l_coef(N_det,N_states)  )
+
+  ! Backup determinants
+  j=0
+  do i=1,N_det
+!    if (pruned(i)) cycle  ! Pruned determinants
+    j+=1
+    ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num)
+    buffer_det(:,:,j) = psi_det(:,:,i)
+  enddo
+  N_det_old = j
+
+  ! Backup coefficients
+  do k=1,N_states
+    j=0
+    do i=1,N_det
+!      if (pruned(i)) cycle  ! Pruned determinants
+      j += 1
+      buffer_r_coef(j,k) = psi_r_coef_bi_ortho(i,k)
+      buffer_l_coef(j,k) = psi_l_coef_bi_ortho(i,k)
+    enddo
+    ASSERT ( j == N_det_old )
+  enddo
+
+  ! Update N_det
+  N_det = N_det_old
+  do j=0,nproc-1
+    N_det = N_det + H_apply_buffer(j)%N_det
+  enddo
+
+  ! Update array sizes
+  if (psi_det_size < N_det) then
+    psi_det_size = N_det
+    TOUCH psi_det_size
+  endif
+
+  ! Restore backup in resized array
+  do i=1,N_det_old
+    psi_det(:,:,i) = buffer_det(:,:,i)
+    ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num )
+  enddo
+  do k=1,N_states
+    do i=1,N_det_old
+      psi_r_coef_bi_ortho(i,k) = buffer_r_coef(i,k)
+      psi_l_coef_bi_ortho(i,k) = buffer_l_coef(i,k)
+    enddo
+  enddo
+
+  ! Copy new buffers
+
+  !$OMP PARALLEL DEFAULT(SHARED)                                     &
+      !$OMP PRIVATE(j,k,i) FIRSTPRIVATE(N_det_old)                   &
+      !$OMP SHARED(N_int,H_apply_buffer,psi_det,psi_r_coef_bi_ortho,psi_l_coef_bi_ortho,N_states,psi_det_size)
+  j=0
+  !$ j=omp_get_thread_num()
+  do k=0,j-1
+    N_det_old += H_apply_buffer(k)%N_det
+  enddo
+  do i=1,H_apply_buffer(j)%N_det
+    do k=1,N_int
+      psi_det(k,1,i+N_det_old) = H_apply_buffer(j)%det(k,1,i)
+      psi_det(k,2,i+N_det_old) = H_apply_buffer(j)%det(k,2,i)
+    enddo
+    ASSERT (sum(popcnt(psi_det(:,1,i+N_det_old))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i+N_det_old))) == elec_beta_num )
+  enddo
+  do k=1,N_states
+    do i=1,H_apply_buffer(j)%N_det
+      psi_r_coef_bi_ortho(i+N_det_old,k) = H_apply_buffer(j)%coef(i,k)
+      psi_l_coef_bi_ortho(i+N_det_old,k) = 0.d0
+    enddo
+  enddo
+  !$OMP BARRIER
+  H_apply_buffer(j)%N_det = 0
+  !$OMP END PARALLEL
+  SOFT_TOUCH N_det psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho 
+
+  logical :: found_duplicates
+  call remove_duplicates_in_psi_det_tc(found_duplicates)
+  call bi_normalize(psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_det,size(psi_l_coef_bi_ortho,1),N_states)
+  SOFT_TOUCH N_det psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho
+
+end
+
+subroutine remove_duplicates_in_psi_det_tc(found_duplicates)
+  implicit none
+  logical, intent(out) :: found_duplicates
+  BEGIN_DOC
+! Removes duplicate determinants in the wave function.
+  END_DOC
+  integer                        :: i,j,k
+  integer(bit_kind), allocatable :: bit_tmp(:)
+  logical,allocatable            :: duplicate(:)
+  logical                        :: dup
+
+  allocate (duplicate(N_det), bit_tmp(N_det))
+
+  found_duplicates = .False.
+
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,dup)
+
+  !$OMP DO
+  do i=1,N_det
+    integer, external            :: det_search_key
+    !$DIR FORCEINLINE
+    bit_tmp(i) = det_search_key(psi_det_sorted_bit_tc(1,1,i),N_int)
+    duplicate(i) = .False.
+  enddo
+  !$OMP END DO
+
+  !$OMP DO schedule(dynamic,1024)
+  do i=1,N_det-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j = j+1
+        if (j > N_det) then
+          exit
+        else
+          cycle
+        endif
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (psi_det_sorted_bit_tc(k,1,i) /= psi_det_sorted_bit_tc(k,1,j) ) &
+        .or. (psi_det_sorted_bit_tc(k,2,i) /= psi_det_sorted_bit_tc(k,2,j) ) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        duplicate(j) = .True.
+        found_duplicates = .True.
+      endif
+      j += 1
+      if (j > N_det) then
+        exit
+      endif
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (found_duplicates) then
+    k=0
+    do i=1,N_det
+      if (.not.duplicate(i)) then
+        k += 1
+        psi_det(:,:,k) = psi_det_sorted_bit_tc (:,:,i)
+        psi_r_coef_bi_ortho(k,:)  = psi_r_coef_sorted_bit(i,:)
+        psi_l_coef_bi_ortho(k,:)  = psi_l_coef_sorted_bit(i,:)
+      else
+        if (sum(abs(psi_r_coef_sorted_bit(i,:))) /= 0.d0 ) then
+          psi_r_coef_bi_ortho(k,:)  = psi_r_coef_sorted_bit(i,:)
+          psi_l_coef_bi_ortho(k,:)  = psi_l_coef_sorted_bit(i,:)
+        endif
+      endif
+    enddo
+    N_det = k
+    psi_det_sorted_bit_tc(:,:,1:N_det) = psi_det(:,:,1:N_det)
+    psi_r_coef_sorted_bit(1:N_det,:) = psi_r_coef_bi_ortho(1:N_det,:)
+    psi_l_coef_sorted_bit(1:N_det,:) = psi_l_coef_bi_ortho(1:N_det,:)
+    TOUCH N_det psi_det psi_det_sorted_bit_tc c0_weight psi_r_coef_sorted_bit psi_l_coef_sorted_bit
+  endif
+  psi_det = psi_det_sorted_tc
+  psi_r_coef_bi_ortho = psi_r_coef_sorted_bi_ortho
+  psi_l_coef_bi_ortho = psi_l_coef_sorted_bi_ortho
+  SOFT_TOUCH psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho psi_det_sorted_bit_tc psi_r_coef_sorted_bit psi_l_coef_sorted_bit
+  deallocate (duplicate,bit_tmp)
+end
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit_tc, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_r_coef_sorted_bit, (N_det,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_l_coef_sorted_bit, (N_det,N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
+   ! They are sorted by determinants interpreted as integers. Useful
+   ! to accelerate the search of a random determinant in the wave
+   ! function.
+   END_DOC
+
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_r_coef_bi_ortho, size(psi_r_coef_bi_ortho,1),       &
+       psi_det_sorted_bit_tc, psi_r_coef_sorted_bit, N_states)
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_l_coef_bi_ortho, size(psi_l_coef_bi_ortho,1),       &
+       psi_det_sorted_bit_tc, psi_l_coef_sorted_bit, N_states)
+
+END_PROVIDER
diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f
new file mode 100644
index 00000000..c8369e93
--- /dev/null
+++ b/src/fci_tc_bi/diagonalize_ci.irp.f
@@ -0,0 +1,99 @@
+
+subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+  use selection_types
+  implicit none
+  integer, intent(inout)              :: ndet      ! number of determinants from before 
+  double precision, intent(inout)     :: E_tc,norm ! E and norm from previous wave function 
+  type(pt2_type)  , intent(in)        :: pt2_data  ! PT2 from previous wave function 
+  logical, intent(in) :: print_pt2
+  BEGIN_DOC
+!  Replace the coefficients of the CI states by the coefficients of the
+!  eigenstates of the CI matrix
+  END_DOC
+  integer :: i,j
+ double precision :: pt2_tmp,pt1_norm,rpt2_tmp,abs_pt2
+ pt2_tmp = pt2_data % pt2(1)
+ abs_pt2 = pt2_data % variance(1)
+ pt1_norm = pt2_data % overlap(1,1)
+ rpt2_tmp = pt2_tmp/(1.d0 + pt1_norm)
+  print*,'*****'
+  print*,'New wave function information'
+  print*,'N_det tc               = ',N_det
+  print*,'norm_ground_left_right_bi_orth = ',norm_ground_left_right_bi_orth
+  print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1)
+  print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1)
+  print*,'*****'
+  if(print_pt2)then
+   print*,'*****'
+   print*,'previous wave function info'
+   print*,'norm(before)      = ',norm
+   print*,'E(before)         = ',E_tc 
+   print*,'PT1 norm          = ',dsqrt(pt1_norm)
+   print*,'PT2               = ',pt2_tmp
+   print*,'rPT2              = ',rpt2_tmp
+   print*,'|PT2|             = ',abs_pt2
+   print*,'Positive PT2      = ',(pt2_tmp + abs_pt2)*0.5d0
+   print*,'Negative PT2      = ',(pt2_tmp - abs_pt2)*0.5d0
+   print*,'E(before) + PT2   = ',E_tc + pt2_tmp/norm
+   print*,'E(before) +rPT2   = ',E_tc + rpt2_tmp/norm
+   write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc  + pt2_tmp/norm,E_tc  + rpt2_tmp/norm,abs_pt2
+   print*,'*****'
+  endif
+  E_tc  = eigval_right_tc_bi_orth(1)
+  norm  = norm_ground_left_right_bi_orth
+  ndet  = N_det
+  do j=1,N_states
+    do i=1,N_det
+      psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j)
+      psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j)
+      psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j))   
+    enddo
+  enddo
+  SOFT_TOUCH eigval_left_tc_bi_orth  eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth 
+  SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef  
+
+  call save_tc_bi_ortho_wavefunction
+end
+
+subroutine print_CI_dressed(ndet, E_tc,norm,pt2_data,print_pt2)
+  use selection_types
+  implicit none
+  integer, intent(inout)              :: ndet      ! number of determinants from before 
+  double precision, intent(inout)     :: E_tc,norm ! E and norm from previous wave function 
+  type(pt2_type)  , intent(in)        :: pt2_data  ! PT2 from previous wave function 
+  logical, intent(in) :: print_pt2
+  BEGIN_DOC
+!  Replace the coefficients of the CI states by the coefficients of the
+!  eigenstates of the CI matrix
+  END_DOC
+  integer :: i,j
+  print*,'*****'
+  print*,'New wave function information'
+  print*,'N_det tc               = ',N_det
+  print*,'norm_ground_left_right_bi_orth = ',norm_ground_left_right_bi_orth
+  print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1)
+  print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1)
+  print*,'*****'
+  if(print_pt2)then
+   print*,'*****'
+   print*,'previous wave function info'
+   print*,'norm(before)      = ',norm
+   print*,'E(before)         = ',E_tc
+   print*,'PT1 norm          = ',dsqrt(pt2_data % overlap(1,1))
+   print*,'E(before) + PT2   = ',E_tc + (pt2_data % pt2(1))/norm
+   print*,'PT2               = ',pt2_data % pt2(1)
+   print*,'Ndet, E_tc, E+PT2 = ',ndet,E_tc,E_tc + (pt2_data % pt2(1))/norm,dsqrt(pt2_data % overlap(1,1))
+   print*,'*****'
+  endif
+  E_tc  = eigval_right_tc_bi_orth(1)
+  norm  = norm_ground_left_right_bi_orth
+  ndet  = N_det
+  do j=1,N_states
+    do i=1,N_det
+      psi_coef(i,j) = reigvec_tc_bi_orth(i,j)
+    enddo
+  enddo
+  SOFT_TOUCH  eigval_left_tc_bi_orth  eigval_right_tc_bi_orth  leigvec_tc_bi_orth  norm_ground_left_right_bi_orth  psi_coef  reigvec_tc_bi_orth 
+
+end
+
diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
new file mode 100644
index 00000000..84ac8166
--- /dev/null
+++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
@@ -0,0 +1,85 @@
+program fci
+  implicit none
+  BEGIN_DOC
+  ! Selected Full Configuration Interaction with stochastic selection
+  ! and PT2.
+  !
+  ! This program performs a |CIPSI|-like selected |CI| using a
+  ! stochastic scheme for both the selection of the important Slater
+  ! determinants and the computation of the |PT2| correction. This
+  ! |CIPSI|-like algorithm will be performed for the lowest states of
+  ! the variational space (see :option:`determinants n_states`). The
+  ! |FCI| program will stop when reaching at least one the two following
+  ! conditions:
+  !
+  ! * number of Slater determinants > :option:`determinants n_det_max`
+  ! * abs(|PT2|) less than :option:`perturbation pt2_max`
+  !
+  ! The following other options can be of interest:
+  !
+  ! :option:`determinants read_wf`
+  !   When set to |false|, the program starts with a ROHF-like Slater
+  !   determinant as a guess wave function. When set to |true|, the
+  !   program starts with the wave function(s) stored in the |EZFIO|
+  !   directory as guess wave function(s).
+  !
+  ! :option:`determinants s2_eig`
+  !   When set to |true|, the selection will systematically add all the
+  !   necessary Slater determinants in order to have a pure spin wave
+  !   function with an |S^2| value corresponding to
+  !   :option:`determinants expected_s2`.
+  !
+  ! For excited states calculations, it is recommended to start with
+  ! :ref:`cis` or :ref:`cisd` guess wave functions, eventually in
+  ! a restricted set of |MOs|, and to set :option:`determinants s2_eig`
+  ! to |true|.
+  !
+  END_DOC
+
+
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid 
+  pruning = -1.d0
+  touch pruning
+!  pt2_relative_error = 0.01d0
+!  touch pt2_relative_error
+  call run_cipsi_tc
+
+end
+
+
+subroutine run_cipsi_tc
+
+  implicit none
+
+  if (.not.is_zmq_slave) then
+    PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
+    if(elec_alpha_num+elec_beta_num.ge.3)then
+      if(three_body_h_tc)then
+        call provide_all_three_ints_bi_ortho
+      endif
+    endif
+    ! ---
+
+    if (do_pt2) then
+      call run_stochastic_cipsi
+    else
+      call run_cipsi
+    endif
+
+  else
+    PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks
+    if(elec_alpha_num+elec_beta_num.ge.3)then
+      if(three_body_h_tc)then
+        call provide_all_three_ints_bi_ortho
+      endif
+    endif
+    ! ---
+
+    call run_slave_cipsi
+
+  endif
+
+end
diff --git a/src/fci_tc_bi/generators.irp.f b/src/fci_tc_bi/generators.irp.f
new file mode 100644
index 00000000..bf972423
--- /dev/null
+++ b/src/fci_tc_bi/generators.irp.f
@@ -0,0 +1,56 @@
+use bitmasks
+
+BEGIN_PROVIDER [ integer, N_det_generators ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the number of generators is 1 : the
+ ! Hartree-Fock determinant
+ END_DOC
+ integer :: i
+ double precision :: norm
+ call write_time(6)
+ norm = 1.d0
+ N_det_generators = N_det
+ do i=1,N_det
+   norm = norm - psi_average_norm_contrib_sorted_tc(i)
+   if (norm - 1.d-10 < 1.d0 - threshold_generators) then
+     N_det_generators = i
+     exit
+   endif
+ enddo
+ N_det_generators = max(N_det_generators,1)
+ call write_int(6,N_det_generators,'Number of generators')
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted_tc(1:N_int,1:2,1:N_det)
+ psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted_tc(1:N_det,1:N_states)
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_gen, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_gen, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ integer, psi_det_sorted_tc_gen_order,     (psi_det_size)  ]
+
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_det_sorted_tc_gen =  psi_det_sorted_tc
+ psi_coef_sorted_tc_gen = psi_coef_sorted_tc
+ psi_det_sorted_tc_gen_order = psi_det_sorted_tc_order
+ integer :: i
+! do i = 1,N_det
+!  print*,'i = ',i
+!  call debug_det(psi_det_sorted_tc(1,1,i),N_int)
+! enddo
+END_PROVIDER
+
+
diff --git a/src/fci_tc_bi/save_energy.irp.f b/src/fci_tc_bi/save_energy.irp.f
new file mode 100644
index 00000000..7c41d00f
--- /dev/null
+++ b/src/fci_tc_bi/save_energy.irp.f
@@ -0,0 +1,9 @@
+subroutine save_energy(E,pt2)
+  implicit none
+  BEGIN_DOC
+! Saves the energy in |EZFIO|.
+  END_DOC
+  double precision, intent(in) :: E(N_states), pt2(N_states)
+  call ezfio_set_fci_tc_energy(E(1:N_states))
+  call ezfio_set_fci_tc_energy_pt2(E(1:N_states)+pt2(1:N_states))
+end
diff --git a/src/fci_tc_bi/scripts_fci_tc/CH2.xyz b/src/fci_tc_bi/scripts_fci_tc/CH2.xyz
new file mode 100644
index 00000000..9fa57f4b
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/CH2.xyz
@@ -0,0 +1,6 @@
+3
+
+C                    6.000000     0.000000     0.000000     0.173480    
+H                    1.000000     0.000000    -0.861500    -0.520430
+H                    1.000000     0.000000     0.861500    -0.520430
+
diff --git a/src/fci_tc_bi/scripts_fci_tc/FH.xyz b/src/fci_tc_bi/scripts_fci_tc/FH.xyz
new file mode 100644
index 00000000..9a1563f4
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/FH.xyz
@@ -0,0 +1,5 @@
+2 
+
+H                    0.000000     0.000000    -0.825120
+F                    0.000000     0.000000     0.091680
+
diff --git a/src/fci_tc_bi/scripts_fci_tc/extract_tables.sh b/src/fci_tc_bi/scripts_fci_tc/extract_tables.sh
new file mode 100755
index 00000000..a585884e
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/extract_tables.sh
@@ -0,0 +1,16 @@
+
+input=h2o
+basis=dz
+EZFIO=${input}_${basis}_bi_ortho
+file=${EZFIO}.tc_fci.out 
+grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" ${file} | cut -d "=" -f 2 > data_${EZFIO}
+file=${EZFIO}.tc_fci_normal_order.out
+grep "Ndet,E,E+PT2,E+RPT2=" ${file} | cut -d "=" -f 2 > data_${EZFIO}_normal
+
+#EZFIO=${input}_${basis}_ortho
+#file=${EZFIO}.tc_fci.out 
+#grep "Ndet, E_tc, E+PT2 =" ${file} | cut -d "=" -f 2 > data_${EZFIO}
+#file=${EZFIO}.tc_fci_normal_order.out
+#grep "Ndet, E_tc, E+PT2 =" ${file} | cut -d "=" -f 2 > data_${EZFIO}_normal
+
+#zip data_${input}_${basis}.zip data*
diff --git a/src/fci_tc_bi/scripts_fci_tc/h2o.sh b/src/fci_tc_bi/scripts_fci_tc/h2o.sh
new file mode 100644
index 00000000..d0afca30
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/h2o.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# This is a sample PBS script
+# temps CPU a ajuster au calcul
+   #PBS -l cput=2000:00:00
+   #PBS -l nodes=1:ppn=16
+# memoire a ajuster au calcul
+   #PBS -l vmem=100gb
+# a changer
+# Pour savoir sur quel noeud on est
+#echo $HOSTNAME
+# Startdir = ou sont les fichiers d'input, par defaut HOMEDIR
+#
+StartDir=$PBS_O_WORKDIR
+echo $StartDir
+#
+# SCRATCHDIR = espace temporaire (local au noeud et a vider apres le calcul)
+# NE PAS MODIFIER
+ulimit -s unlimited
+export SCRATCHDIR=/scratch/$USER/$PBS_JOBID
+#
+cd $StartDir
+
+
+############################################################################
+#### EXAMPLE OF SCRIPT TO RUN A CIPSI CALCULATION ON 5 STATES ON THE Ne^+ CATION
+#### USING NATURAL ORBITALS OF A SMALL CIPSI AS MOS 
+#### ALL STATES WILL HAVE THE SAME SPIN SIMETRY : A DOUBLET 
+
+####### YOU PUT THE PATH TO YOUR 
+QP_ROOT=/home_lct/eginer/programs/qp2
+source ${QP_ROOT}/quantum_package.rc 
+####### YOU LOAD SOME LIBRARIES 
+alias python3='/programmes/installation/Python/3.7.1/bin/python3'
+type -a python3
+
+export OMP_NUM_THREADS=16
+
+module load intel2016_OMPI-V2
+
+source ~/programs/qp2/quantum_package.rc
+./script.sh h2o dz O 1
diff --git a/src/fci_tc_bi/scripts_fci_tc/h2o.xyz b/src/fci_tc_bi/scripts_fci_tc/h2o.xyz
new file mode 100644
index 00000000..dee51ffc
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/h2o.xyz
@@ -0,0 +1,6 @@
+3
+
+O      0.000000     0.000000     0.000000
+H      0.000000     0.000000     0.957200
+H     -0.926627     0.000000    -0.239987
+
diff --git a/src/fci_tc_bi/scripts_fci_tc/script.sh b/src/fci_tc_bi/scripts_fci_tc/script.sh
new file mode 100755
index 00000000..58585658
--- /dev/null
+++ b/src/fci_tc_bi/scripts_fci_tc/script.sh
@@ -0,0 +1,31 @@
+source /home_lct/eginer/qp2/quantum_package.rc
+input=$1
+ basis=$2
+ atom=$3
+ mul=$4
+ EXPORT_OMP_NUM_THREADS=16
+ dir=${input}_${basis}
+ mkdir ${dir}
+ cp ${input}.xyz ${dir}/
+ cd $dir
+ EZFIO=${input}_${basis}_bi_ortho
+ qp create_ezfio -b "${atom}:cc-pcv${basis}|H:cc-pv${basis}" ${input}.xyz -m $mul -o $EZFIO
+ qp run scf
+ # Getting THE GOOD VALUE OF MU
+ qp run print_mu_av_tc  | tee ${EZFIO_FILE}.mu_av.out
+ mu=`grep "average_mu_rs_c_lda  =" ${EZFIO_FILE}.mu_av.out | cut -d "=" -f 2`
+ qp set ao_two_e_erf_ints mu_erf $mu
+ # Carrying the BI-ORTHO TC-SCF 
+ qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out
+ #   Three body terms without normal order
+ ### THREE E TERMS FOR FCI
+ qp set tc_keywords three_body_h_tc True  
+ qp set tc_keywords double_normal_ord False
+ qp set perturbation pt2_max 0.003
+ qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.tc_fci.out
+ #   Three body terms with normal order
+ qp set tc_keywords double_normal_ord True
+ qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.tc_fci_normal_order.out
+
+cd ../
+
diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f
new file mode 100644
index 00000000..4d3de7d0
--- /dev/null
+++ b/src/fci_tc_bi/selectors.irp.f
@@ -0,0 +1,70 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, threshold_selectors ]
+ implicit none
+ BEGIN_DOC
+ ! Thresholds on selectors (fraction of the square of the norm)
+ END_DOC
+ threshold_selectors = dsqrt(threshold_generators)
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, N_det_selectors]
+  implicit none
+  BEGIN_DOC
+  ! For Single reference wave functions, the number of selectors is 1 : the
+  ! Hartree-Fock determinant
+  END_DOC
+  integer                        :: i
+  double precision               :: norm, norm_max
+  call write_time(6)
+  N_det_selectors = N_det
+  call write_int(6,N_det_selectors,'Number of selectors')
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_tc, (psi_selectors_size,2,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Determinants on which we apply <i|H|psi> for perturbation.
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_int
+      psi_selectors(k,1,i) = psi_det_sorted_tc(k,1,i)
+      psi_selectors(k,2,i) = psi_det_sorted_tc(k,2,i)
+    enddo
+  enddo
+  do k=1,N_states
+    do i=1,N_det_selectors
+      psi_selectors_coef(i,k)      = psi_coef_sorted_tc_gen(i,k)
+      psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k)
+      psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k)
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp_tc, (N_states,2,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k)
+      psi_selectors_coef_transp_tc(k,1,i) = psi_selectors_coef_tc(i,1,k)
+      psi_selectors_coef_transp_tc(k,2,i) = psi_selectors_coef_tc(i,2,k)
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, psi_selectors_size ]
+ implicit none
+ psi_selectors_size = psi_det_size
+END_PROVIDER
+
diff --git a/src/fci_tc_bi/zmq.irp.f b/src/fci_tc_bi/zmq.irp.f
new file mode 100644
index 00000000..cb2df483
--- /dev/null
+++ b/src/fci_tc_bi/zmq.irp.f
@@ -0,0 +1,103 @@
+BEGIN_TEMPLATE
+
+integer function zmq_put_$X(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put $X on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  zmq_put_$X = 0
+
+  write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, '$X'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    zmq_put_$X = -1
+    return
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,$X,4,0)
+  if (rc /= 4) then
+    zmq_put_$X = -1
+    return
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    zmq_put_$X = -1
+    return
+  endif
+
+end
+
+integer function zmq_get_$X(zmq_to_qp_run_socket, worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get $X from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  PROVIDE zmq_state
+  zmq_get_$X = 0
+  if (mpi_master) then
+
+    write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, '$X'
+    rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+    if (rc /= len(trim(msg))) then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+    rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+    if (msg(1:14) /= 'get_data_reply') then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+    rc = f77_zmq_recv(zmq_to_qp_run_socket,$X,4,0)
+    if (rc /= 4) then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+  endif
+
+  10 continue
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+
+    call MPI_BCAST (zmq_get_$X, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      print *,  irp_here//': Unable to broadcast N_det_generators'
+      stop -1
+    endif
+    call MPI_BCAST ($X, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      print *,  irp_here//': Unable to broadcast N_det_generators'
+      stop -1
+    endif
+  IRP_ENDIF
+
+end
+
+SUBST [ X ]
+
+N_det_generators ;;
+N_det_selectors ;;
+
+END_TEMPLATE
+
diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats
index 65117b76..df566032 100644
--- a/src/hartree_fock/10.hf.bats
+++ b/src/hartree_fock/10.hf.bats
@@ -18,6 +18,44 @@ function run() {
 }
 
 
+function run_pt_charges() {
+  thresh=1.e-5
+  cp ${QP_ROOT}/src/nuclei/write_pt_charges.py . 
+  cat > hcn.xyz << EOF
+3
+HCN molecule
+C    0.0    0.0    0.0
+H    0.0    0.0    1.064
+N    0.0    0.0    -1.156
+EOF
+
+cat > hcn_charges.xyz << EOF
+0.5    2.0   0.0   0.0
+0.5   -2.0   0.0   0.0
+EOF
+
+EZFIO=hcn_pt_charges
+rm -rf $EZFIO
+qp create_ezfio -b def2-svp hcn.xyz -o $EZFIO
+qp run scf
+mv hcn_charges.xyz ${EZFIO}_point_charges.xyz
+python write_pt_charges.py ${EZFIO}
+qp set nuclei point_charges True
+qp run scf | tee ${EZFIO}.pt_charges.out
+  energy="$(ezfio get hartree_fock energy)"
+good=-92.76613324421798
+  eq $energy $good $thresh
+rm -rf $EZFIO
+}
+
+@test "point charges" { 
+ run_pt_charges
+}
+
+@test "HCN" { # 7.792500 8.51926s
+  run hcn.ezfio -92.88717500035233
+}
+
 @test "B-B" { # 3s
   run b2_stretched.ezfio -48.9950585434279
 }
@@ -91,9 +129,6 @@ function run() {
   run ch4.ezfio -40.19961807784367
 }
 
-@test "HCN" { # 7.792500 8.51926s
-  run hcn.ezfio -92.88717500035233
-}
 
 @test "N2" { # 8.648100 13.754s
   run n2.ezfio  -108.9834897852979
diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg
new file mode 100644
index 00000000..2a5e94a7
--- /dev/null
+++ b/src/iterations_tc/EZFIO.cfg
@@ -0,0 +1,24 @@
+[n_iter]
+interface: ezfio
+doc: Number of saved iterations
+type:integer
+default: 1
+
+[n_det_iterations]
+interface: ezfio, provider
+doc: Number of determinants at each iteration
+type: integer
+size: (100)
+ 
+[energy_iterations]
+interface: ezfio, provider
+doc: The variational energy at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
+[pt2_iterations]
+interface: ezfio, provider
+doc: The |PT2| correction at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED
new file mode 100644
index 00000000..e69de29b
diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f
new file mode 100644
index 00000000..821f5e84
--- /dev/null
+++ b/src/iterations_tc/io.irp.f
@@ -0,0 +1,37 @@
+BEGIN_PROVIDER [ integer, n_iter  ]
+  implicit none
+  BEGIN_DOC
+! number of iterations
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+
+      double precision :: zeros(N_states,100)
+      integer :: izeros(100)
+      zeros = 0.d0
+      izeros = 0
+      call ezfio_set_iterations_n_iter(0)
+      call ezfio_set_iterations_energy_iterations(zeros)
+      call ezfio_set_iterations_pt2_iterations(zeros)
+      call ezfio_set_iterations_n_det_iterations(izeros)
+      n_iter = 1
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_iter with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f
new file mode 100644
index 00000000..2f1cf0c1
--- /dev/null
+++ b/src/iterations_tc/iterations.irp.f
@@ -0,0 +1,43 @@
+BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Extrapolated energy, using E_var = f(PT2) where PT2=0
+ END_DOC
+! integer :: i
+ extrapolated_energy = 0.D0
+END_PROVIDER 
+
+ subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy)
+ implicit none
+ integer, intent(in)  :: Niter
+ double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter)
+ call extrapolate_data(Niter,ept2,pt1,extrap_energy)
+ end
+
+subroutine save_iterations(e_, pt2_,n_)
+  implicit none
+  BEGIN_DOC
+! Update the energy in the EZFIO file.
+  END_DOC
+  integer, intent(in) :: n_
+  double precision, intent(in) :: e_(N_states), pt2_(N_states)
+  integer :: i
+
+  if (N_iter == 101) then
+    do i=2,N_iter-1
+      energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter)
+      pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) 
+    enddo
+    N_iter = N_iter-1
+    TOUCH N_iter
+  endif
+
+  energy_iterations(1:N_states,N_iter) = e_(1:N_states)
+     pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states)
+  n_det_iterations(N_iter) = n_
+  call ezfio_set_iterations_N_iter(N_iter)
+  call ezfio_set_iterations_energy_iterations(energy_iterations)
+  call ezfio_set_iterations_pt2_iterations(pt2_iterations)
+  call ezfio_set_iterations_n_det_iterations(n_det_iterations)
+end
+
diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f
new file mode 100644
index 00000000..cb46fb67
--- /dev/null
+++ b/src/iterations_tc/print_extrapolation.irp.f
@@ -0,0 +1,46 @@
+subroutine print_extrapolated_energy
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer :: i,k
+
+  if (N_iter< 2) then
+    return
+  endif
+  write(*,'(A)') ''
+  write(*,'(A)') 'Extrapolated energies'
+  write(*,'(A)') '------------------------'
+  write(*,'(A)') ''
+
+  print *,  ''
+  print *,  'State ', 1
+  print *,  ''
+  write(*,*)  '=========== ', '==================='
+  write(*,*)  'minimum PT2 ', 'Extrapolated energy'
+  write(*,*)  '=========== ', '==================='
+  do k=2,min(N_iter,8)
+    write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1)
+  enddo
+  write(*,*)  '=========== ', '==================='
+
+  do i=2, min(N_states,N_det)
+    print *,  ''
+    print *,  'State ', i
+    print *,  ''
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    write(*,*)  'minimum PT2 ', 'Extrapolated energy ', '  Excitation (a.u)  ', '  Excitation (eV)  '
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    do k=2,min(N_iter,8)
+      write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), &
+          extrapolated_energy(k,i) - extrapolated_energy(k,1), &
+          (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0
+    enddo
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+  enddo
+
+  print *,  ''
+
+end subroutine
+
diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f
new file mode 100644
index 00000000..8e6285e2
--- /dev/null
+++ b/src/iterations_tc/print_summary.irp.f
@@ -0,0 +1,104 @@
+subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer, intent(in)            :: n_det_, n_configuration_, n_st
+  double precision, intent(in)   :: e_(n_st), s2_(n_st)
+  type(pt2_type)  , intent(in)   :: pt2_data, pt2_data_err
+  integer                        :: i, k
+  integer                        :: N_states_p
+  character*(9)                  :: pt2_string
+  character*(512)                :: fmt
+
+  if (do_pt2) then
+    pt2_string = '        '
+  else
+    pt2_string = '(approx)'
+  endif
+
+  N_states_p = min(N_det_,n_st)
+
+  print *, ''
+  print '(A,I12)',  'Summary at N_det = ', N_det_
+  print '(A)',      '-----------------------------------'
+  print *, ''
+
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))'
+  write(*,fmt) ('State',k, k=1,N_states_p)
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))'
+  write(*,fmt) '# E          ', e_(1:N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1)
+    write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0
+  endif
+  write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))'
+  write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p)
+  write(*,'(A)') '#'
+  write(*,fmt) '# E+PT2      ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# E+rPT2     ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p)
+    write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p)
+  endif
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  print *,  ''
+
+  print *,  'N_det             = ', N_det_
+  print *,  'N_states          = ', n_st
+  if (s2_eig) then
+    print *,  'N_cfg             = ', N_configuration_
+    if (only_expected_s2) then
+      print *,  'N_csf             = ', N_csf
+    endif
+  endif
+  print *,  ''
+
+  do k=1, N_states_p
+    print*,'* State ',k
+    print *,  '< S^2 >         = ', s2_(k)
+    print *,  'E               = ', e_(k)
+    print *,  'Variance        = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k)
+    print *,  'PT norm         = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k))
+    print *,  'PT2             = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'rPT2            = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  ''
+  enddo
+
+  print *,  '-----'
+  if(n_st.gt.1)then
+    print *, 'Variational Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i) - e_(1)), &
+        (e_(i) - e_(1)) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), &
+        (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + renormalized perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), &
+        (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0
+    enddo
+  endif
+
+!  call print_energy_components()
+
+end subroutine
+
diff --git a/src/mo_basis/mo_class.irp.f b/src/mo_basis/mo_class.irp.f
index 95fbb443..7705e414 100644
--- a/src/mo_basis/mo_class.irp.f
+++ b/src/mo_basis/mo_class.irp.f
@@ -1,8 +1,3 @@
-! DO NOT MODIFY BY HAND
-! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py
-! from file /home/eginer/programs/qp2/src/mo_basis/EZFIO.cfg
-
-
 BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ]
   implicit none
   BEGIN_DOC
@@ -35,6 +30,4 @@ BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ]
     endif
   IRP_ENDIF
 
-  call write_time(6)
-
 END_PROVIDER
diff --git a/src/mo_two_e_erf_ints/map_integrals_erf.irp.f b/src/mo_two_e_erf_ints/map_integrals_erf.irp.f
index 73050ec5..3405ec2b 100644
--- a/src/mo_two_e_erf_ints/map_integrals_erf.irp.f
+++ b/src/mo_two_e_erf_ints/map_integrals_erf.irp.f
@@ -235,11 +235,11 @@ subroutine get_mo_two_e_integrals_erf_ij(k,l,sze,out_array,map)
 
   logical :: integral_is_in_map
   if (key_kind == 8) then
-    call i8radix_sort(hash,iorder,kk,-1)
+    call i8sort(hash,iorder,kk)
   else if (key_kind == 4) then
-    call iradix_sort(hash,iorder,kk,-1)
+    call isort(hash,iorder,kk)
   else if (key_kind == 2) then
-    call i2radix_sort(hash,iorder,kk,-1)
+    call i2sort(hash,iorder,kk)
   endif
 
   call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
@@ -290,11 +290,11 @@ subroutine get_mo_two_e_integrals_erf_i1j1(k,l,sze,out_array,map)
 
   logical :: integral_is_in_map
   if (key_kind == 8) then
-    call i8radix_sort(hash,iorder,kk,-1)
+    call i8sort(hash,iorder,kk)
   else if (key_kind == 4) then
-    call iradix_sort(hash,iorder,kk,-1)
+    call isort(hash,iorder,kk)
   else if (key_kind == 2) then
-    call i2radix_sort(hash,iorder,kk,-1)
+    call i2sort(hash,iorder,kk)
   endif
 
   call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f
index b764a1a6..3642365e 100644
--- a/src/mo_two_e_ints/core_quantities.irp.f
+++ b/src/mo_two_e_ints/core_quantities.irp.f
@@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
    enddo
    do k=1,mo_num
      do i=1,mo_num
-       h_core_ri(i,j) = h_core_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+       h_core_ri(i,j) = h_core_ri(i,j) - 0.5d0 * big_array_exchange_integrals(k,i,j)
      enddo
    enddo
  enddo
diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index d58932ce..ae299e9f 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -53,7 +53,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
 !    call four_idx_novvvv
     call four_idx_novvvv_old
   else
-    call add_integrals_to_map(full_ijkl_bitmask_4)
+    if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then
+      call four_idx_dgemm
+    else
+      call add_integrals_to_map(full_ijkl_bitmask_4)
+    endif
   endif
 
   call wall_time(wall_2)
@@ -77,6 +81,94 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
 
 END_PROVIDER
 
+subroutine four_idx_dgemm
+  implicit none
+  integer :: p,q,r,s,i,j,k,l
+  double precision, allocatable :: a1(:,:,:,:)
+  double precision, allocatable :: a2(:,:,:,:)
+
+  allocate (a1(ao_num,ao_num,ao_num,ao_num))
+
+  print *, 'Getting AOs'
+  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,r,s)
+  do s=1,ao_num
+    do r=1,ao_num
+      do q=1,ao_num
+        call get_ao_two_e_integrals(q,r,s,ao_num,a1(1,q,r,s))
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+  print *, '1st transformation'
+  ! 1st transformation
+  allocate (a2(ao_num,ao_num,ao_num,mo_num))
+  call dgemm('T','N', (ao_num*ao_num*ao_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*ao_num*ao_num))
+
+  ! 2nd transformation
+  print *, '2nd transformation'
+  deallocate (a1)
+  allocate (a1(ao_num,ao_num,mo_num,mo_num))
+  call dgemm('T','N', (ao_num*ao_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (ao_num*ao_num*mo_num))
+
+  ! 3rd transformation
+  print *, '3rd transformation'
+  deallocate (a2)
+  allocate (a2(ao_num,mo_num,mo_num,mo_num))
+  call dgemm('T','N', (ao_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*mo_num*mo_num))
+
+  ! 4th transformation
+  print *, '4th transformation'
+  deallocate (a1)
+  allocate (a1(mo_num,mo_num,mo_num,mo_num))
+  call dgemm('T','N', (mo_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (mo_num*mo_num*mo_num))
+
+  deallocate (a2)
+
+  integer :: n_integrals, size_buffer
+  integer(key_kind)  , allocatable :: buffer_i(:)
+  real(integral_kind), allocatable :: buffer_value(:)
+  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,buffer_value,buffer_i,n_integrals)
+  allocate ( buffer_i(size_buffer), buffer_value(size_buffer) )
+
+  n_integrals = 0
+  !$OMP DO
+  do l=1,mo_num
+    do k=1,mo_num
+      do j=1,l
+        do i=1,k
+            if (abs(a1(i,j,k,l)) < mo_integrals_threshold) then
+              cycle
+            endif
+            n_integrals += 1
+            buffer_value(n_integrals) = a1(i,j,k,l)
+            !DIR$ FORCEINLINE
+            call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+            if (n_integrals == size_buffer) then
+              call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
+              n_integrals = 0
+            endif
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+
+  call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
+
+  deallocate(buffer_i, buffer_value)
+  !$OMP END PARALLEL
+
+  deallocate (a1)
+
+  call map_unique(mo_integrals_map)
+
+  integer*8                      :: get_mo_map_size, mo_map_size
+  mo_map_size = get_mo_map_size()
+
+end subroutine
 
 subroutine add_integrals_to_map(mask_ijkl)
   use bitmasks
@@ -153,24 +245,26 @@ subroutine add_integrals_to_map(mask_ijkl)
     return
   endif
 
-  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+  call wall_time(wall_1)
+
+  size_buffer = min(ao_num*ao_num*ao_num,8000000)
   print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
 
-  double precision               :: accu_bis
-  accu_bis = 0.d0
-  call wall_time(wall_1)
-
   !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax,   &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l,   &
       !$OMP  mo_coef_transp,                                         &
       !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
       !$OMP  mo_coef_is_built, wall_1,                               &
       !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
+
+  thread_num = 0
+  !$  thread_num = omp_get_thread_num()
+
   n_integrals = 0
   wall_0 = wall_1
   allocate(two_e_tmp_3(mo_num, n_j, n_k),                 &
@@ -181,8 +275,6 @@ subroutine add_integrals_to_map(mask_ijkl)
       buffer_i(size_buffer),                                         &
       buffer_value(size_buffer) )
 
-  thread_num = 0
-  !$  thread_num = omp_get_thread_num()
   !$OMP DO SCHEDULE(guided)
   do l1 = 1,ao_num
     two_e_tmp_3 = 0.d0
@@ -340,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
-  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
-      real(mo_integrals_threshold,integral_kind))
+  if (n_integrals > 0) then
+    call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+        real(mo_integrals_threshold,integral_kind))
+  endif
   deallocate(buffer_i, buffer_value)
   !$OMP END PARALLEL
   call map_merge(mo_integrals_map)
@@ -433,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
 
   call wall_time(wall_1)
   call cpu_time(cpu_1)
-  double precision               :: accu_bis
-  accu_bis = 0.d0
   !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,       &
       !$OMP  mo_coef_transp,                                         &
@@ -636,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
   call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
       real(mo_integrals_threshold,integral_kind))
   deallocate(buffer_i, buffer_value)
diff --git a/src/mol_properties/EZFIO.cfg b/src/mol_properties/EZFIO.cfg
new file mode 100644
index 00000000..35a095fb
--- /dev/null
+++ b/src/mol_properties/EZFIO.cfg
@@ -0,0 +1,23 @@
+[print_all_transitions]
+type: logical
+doc: If true, print the transition between all the states
+interface: ezfio,provider,ocaml
+default: false
+
+[calc_dipole_moment]
+type: logical
+doc: If true, the electric dipole moment will be computed
+interface: ezfio,provider,ocaml
+default: false
+
+[calc_tr_dipole_moment]
+type: logical
+doc: If true and N_states > 1, the transition electric dipole moment will be computed
+interface: ezfio,provider,ocaml
+default: false
+
+[calc_osc_str]
+type: logical
+doc: If true and N_states > 1, the oscillator strength will be computed
+interface: ezfio,provider,ocaml
+default: false
diff --git a/src/mol_properties/NEED b/src/mol_properties/NEED
new file mode 100644
index 00000000..8d89a452
--- /dev/null
+++ b/src/mol_properties/NEED
@@ -0,0 +1,2 @@
+determinants
+davidson_undressed
diff --git a/src/mol_properties/README.md b/src/mol_properties/README.md
new file mode 100644
index 00000000..637b76d7
--- /dev/null
+++ b/src/mol_properties/README.md
@@ -0,0 +1,25 @@
+# Molecular properties
+
+Available quantities:  
+- Electric dipole moment  
+- Electric transition dipole moment  
+- Oscillator strength  
+
+They are not computed by default. To compute them:
+```
+qp set mol_properties calc_dipole_moment true  
+qp set mol_properties calc_tr_dipole_moment true  
+qp set mol_properties calc_osc_str true  
+```
+If you are interested in transitions between two excited states:  
+```
+qp set mol_properties print_all_transitions true
+```
+They can be obtained by running
+```
+qp run properties
+```
+or at each step of a cipsi calculation with
+```
+qp run fci
+```
diff --git a/src/mol_properties/ci_energy_no_diag.irp.f b/src/mol_properties/ci_energy_no_diag.irp.f
new file mode 100644
index 00000000..a4407d3b
--- /dev/null
+++ b/src/mol_properties/ci_energy_no_diag.irp.f
@@ -0,0 +1,13 @@
+BEGIN_PROVIDER [double precision,  ci_energy_no_diag, (N_states) ]
+
+  implicit none
+
+  BEGIN_DOC
+  ! CI energy from density matrices and integrals
+  ! Avoid the rediagonalization for ci_energy
+  END_DOC
+
+  ci_energy_no_diag = psi_energy + nuclear_repulsion
+
+END_PROVIDER
+
diff --git a/src/mol_properties/mo_deriv_1.irp.f b/src/mol_properties/mo_deriv_1.irp.f
new file mode 100644
index 00000000..cfe6f789
--- /dev/null
+++ b/src/mol_properties/mo_deriv_1.irp.f
@@ -0,0 +1,30 @@
+ BEGIN_PROVIDER [double precision, mo_deriv_1_x , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_deriv_1_y , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_deriv_1_z , (mo_num,mo_num)]
+ BEGIN_DOC
+ ! array of the integrals of MO_i * d/dx MO_j
+ ! array of the integrals of MO_i * d/dy MO_j
+ ! array of the integrals of MO_i * d/dz MO_j
+ END_DOC
+ implicit none
+
+  call ao_to_mo(                                                     &
+      ao_deriv_1_x,                                                   &
+      size(ao_deriv_1_x,1),                                           &
+      mo_deriv_1_x,                                                   &
+      size(mo_deriv_1_x,1)                                            &
+      )
+  call ao_to_mo(                                                     &
+      ao_deriv_1_y,                                                   &
+      size(ao_deriv_1_y,1),                                           &
+      mo_deriv_1_y,                                                   &
+      size(mo_deriv_1_y,1)                                            &
+      )
+  call ao_to_mo(                                                     &
+      ao_deriv_1_z,                                                   &
+      size(ao_deriv_1_z,1),                                           &
+      mo_deriv_1_z,                                                   &
+      size(mo_deriv_1_z,1)                                            &
+      )
+
+END_PROVIDER
diff --git a/src/mol_properties/multi_s_deriv_1.irp.f b/src/mol_properties/multi_s_deriv_1.irp.f
new file mode 100644
index 00000000..84bfecc9
--- /dev/null
+++ b/src/mol_properties/multi_s_deriv_1.irp.f
@@ -0,0 +1,69 @@
+ BEGIN_PROVIDER [double precision, multi_s_deriv_1, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_x_deriv_1, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_y_deriv_1, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_z_deriv_1, (N_states, N_states)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! Providers for :
+  ! <Psi_m|v_x|Psi_n>
+  ! <Psi_m|v_y|Psi_n>
+  ! <Psi_m|v_z|Psi_n>
+  ! ||v|| = sqrt(v_x^2 + v_y^2 + v_z^2)
+  ! v_x = d/dx
+  ! Cf. multi_s_dipole_moment for the equations
+  END_DOC
+
+  integer          :: istate,jstate ! States
+  integer          :: i,j           ! general spatial MOs
+  double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z
+ 
+  multi_s_x_deriv_1 = 0.d0
+  multi_s_y_deriv_1 = 0.d0
+  multi_s_z_deriv_1 = 0.d0
+ 
+  do jstate = 1, N_states
+    do istate = 1, N_states
+ 
+      do i = 1, mo_num  
+        do j = 1, mo_num  
+          multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_x(j,i)  
+          multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_y(j,i) 
+          multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_z(j,i) 
+        enddo
+      enddo
+ 
+    enddo
+  enddo
+ 
+  ! Nuclei part
+  nuclei_part_x = 0.d0
+  nuclei_part_y = 0.d0
+  nuclei_part_z = 0.d0
+ 
+  do i = 1,nucl_num 
+    nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) 
+    nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) 
+    nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) 
+  enddo
+ 
+  ! Only if istate = jstate, otherwise 0 by the orthogonality of the states
+  do istate = 1, N_states
+    multi_s_x_deriv_1(istate,istate) += nuclei_part_x
+    multi_s_y_deriv_1(istate,istate) += nuclei_part_y
+    multi_s_z_deriv_1(istate,istate) += nuclei_part_z
+  enddo
+ 
+  ! d = <Psi|r|Psi>
+  do jstate = 1, N_states
+    do istate = 1, N_states
+      multi_s_deriv_1(istate,jstate) = &
+        dsqrt(multi_s_x_deriv_1(istate,jstate)**2 & 
+            + multi_s_y_deriv_1(istate,jstate)**2 &
+            + multi_s_z_deriv_1(istate,jstate)**2) 
+    enddo
+  enddo
+
+END_PROVIDER
+
diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f
new file mode 100644
index 00000000..d5e62799
--- /dev/null
+++ b/src/mol_properties/multi_s_dipole_moment.irp.f
@@ -0,0 +1,93 @@
+! Providers for the dipole moments along x,y,z and the total dipole
+! moments.
+
+! The dipole moment along the x axis is:
+! \begin{align*}
+! \mu_x = < \Psi_m | \sum_i x_i + \sum_A Z_A R_A | \Psi_n >
+! \end{align*}
+! where $i$ is used for the electrons and $A$ for the nuclei.
+! $Z_A$ the charge of the nucleus $A$ and $R_A$ its position in the
+! space.
+
+! And it can be computed using the (transition, if n /= m) density
+! matrix as a expectation value
+! \begin{align*}
+! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} < \phi_p | x | \phi_p >
+!      + \sum_{pq, p \neq q} \gamma_{pq}^{nm} < \phi_p | x | \phi_q > +  < \Psi_m | \sum_A Z_A R_A | \Psi_n >
+! \end{align*}
+
+
+
+BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment, (N_states, N_states)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! Providers for :
+  ! <\Psi_m|\mu_x|\Psi_n>
+  ! <\Psi_m|\mu_y|\Psi_n>
+  ! <\Psi_m|\mu_z|\Psi_n>
+  ! ||\mu|| = \sqrt{\mu_x^2 + \mu_y^2 + \mu_z^2}
+  !
+  ! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} \bra{\phi_p} x \ket{\phi_p} 
+  !   + \sum_{pq, p \neq q} \gamma_{pq}^{nm} \bra{\phi_p} x \ket{\phi_q}
+  ! \Psi: wf
+  ! n,m indexes for the states
+  ! p,q: general spatial MOs 
+  ! gamma^{nm}: density matrix \bra{\Psi^n} a^{\dagger}_a a_i \ket{\Psi^m}
+  END_DOC
+
+  integer          :: istate,jstate ! States
+  integer          :: i,j           ! general spatial MOs
+  double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z
+ 
+  multi_s_x_dipole_moment = 0.d0
+  multi_s_y_dipole_moment = 0.d0
+  multi_s_z_dipole_moment = 0.d0
+ 
+  do jstate = 1, N_states
+    do istate = 1, N_states
+ 
+      do i = 1, mo_num  
+        do j = 1, mo_num  
+          multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i)  
+          multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) 
+          multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) 
+        enddo
+      enddo
+ 
+    enddo
+  enddo
+ 
+  ! Nuclei part
+  nuclei_part_x = 0.d0
+  nuclei_part_y = 0.d0
+  nuclei_part_z = 0.d0
+ 
+  do i = 1,nucl_num 
+    nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) 
+    nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) 
+    nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) 
+  enddo
+ 
+  ! Only if istate = jstate, otherwise 0 by the orthogonality of the states
+  do istate = 1, N_states
+    multi_s_x_dipole_moment(istate,istate) += nuclei_part_x
+    multi_s_y_dipole_moment(istate,istate) += nuclei_part_y
+    multi_s_z_dipole_moment(istate,istate) += nuclei_part_z
+  enddo
+ 
+  ! d = <Psi|r|Psi>
+  do jstate = 1, N_states
+    do istate = 1, N_states
+      multi_s_dipole_moment(istate,jstate) = &
+        dsqrt(multi_s_x_dipole_moment(istate,jstate)**2 & 
+            + multi_s_y_dipole_moment(istate,jstate)**2 &
+            + multi_s_z_dipole_moment(istate,jstate)**2) 
+    enddo
+  enddo
+
+END_PROVIDER
diff --git a/src/mol_properties/print_mol_properties.irp.f b/src/mol_properties/print_mol_properties.irp.f
new file mode 100644
index 00000000..3753a3dd
--- /dev/null
+++ b/src/mol_properties/print_mol_properties.irp.f
@@ -0,0 +1,24 @@
+subroutine print_mol_properties()
+
+  implicit none
+
+  BEGIN_DOC
+  ! Run the propertie calculations
+  END_DOC
+
+  ! Electric dipole moment
+  if (calc_dipole_moment) then
+    call print_dipole_moment
+  endif
+
+  ! Transition electric dipole moment
+  if (calc_tr_dipole_moment .and. N_states > 1) then
+    call print_transition_dipole_moment
+  endif
+
+  ! Oscillator strength
+  if (calc_osc_str .and. N_states > 1) then
+    call print_oscillator_strength 
+  endif
+
+end
diff --git a/src/mol_properties/print_properties.irp.f b/src/mol_properties/print_properties.irp.f
new file mode 100644
index 00000000..4c0a9f38
--- /dev/null
+++ b/src/mol_properties/print_properties.irp.f
@@ -0,0 +1,194 @@
+! Dipole moments
+
+! Provided
+! | N_states                                   | integer          | Number of states                         |
+! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis |
+! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis |
+! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis |
+! | multi_s_dipole_moment(N_states,N_states)   | double precision | Total (transition) dipole moments        |
+
+
+subroutine print_dipole_moment
+
+  implicit none
+
+  BEGIN_DOC
+  ! To print the dipole moment ||<\Psi_i|µ|\Psi_i>|| and its x,y,z components
+  END_DOC
+
+  integer :: istate
+  double precision, allocatable :: d(:), d_x(:), d_y(:), d_z(:)
+
+  allocate(d(N_states),d_x(N_states),d_y(N_states),d_z(N_states))
+ 
+  do istate = 1, N_states 
+    d_x(istate) = multi_s_x_dipole_moment(istate,istate)
+    d_y(istate) = multi_s_y_dipole_moment(istate,istate)
+    d_z(istate) = multi_s_z_dipole_moment(istate,istate)
+    d(istate)   = multi_s_dipole_moment(istate,istate)
+  enddo
+
+  ! Atomic units
+  print*,''
+  print*,'# Dipoles:'
+  print*,'=============================================='
+  print*,' Dipole moments (au)'
+  print*,' State      X           Y           Z         ||µ||' 
+
+  do istate = 1, N_states 
+    write(*,'(I5,4(F12.6))') (istate-1), d_x(istate), d_y(istate), d_z(istate), d(istate)
+  enddo
+
+  ! Debye
+  print*,''
+  print*,' Dipole moments (D)'
+  print*,' State      X           Y           Z         ||µ||' 
+
+  do istate = 1, N_states 
+    write(*,'(I5,4(F12.6))') (istate-1), d_x(istate)*au_to_D, d_y(istate)*au_to_D, d_z(istate)*au_to_D, d(istate)*au_to_D
+  enddo
+
+  print*,'=============================================='
+  print*,''
+
+  deallocate(d,d_x,d_y,d_z)
+
+ end
+
+! Transition dipole moments
+
+! Provided
+! | N_states                                   | integer          | Number of states                         |
+! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis |
+! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis |
+! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis |
+! | multi_s_dipole_moment(N_states,N_states)   | double precision | Total (transition) dipole moments        |
+
+
+subroutine print_transition_dipole_moment
+
+  implicit none
+
+  BEGIN_DOC
+  ! To print the transition dipole moment ||<\Psi_i|µ|\Psi_j>|| and its components along x, y and z
+  END_DOC
+
+  integer          :: istate,jstate, n_states_print
+  double precision :: f, d, d_x, d_y, d_z, dip_str
+
+  if (N_states == 1 .or. N_det == 1) then
+    return
+  endif
+
+  print*,''
+  print*,'# Transition dipoles:'
+  print*,'=============================================='
+  print*,' Transition dipole moments (au)'
+  write(*,'(A89)') '   #  Transition       X           Y           Z         ||µ||     Dip. str.   Osc. str.'
+ 
+  if (print_all_transitions) then
+    n_states_print = N_states
+  else
+   n_states_print = 1
+  endif
+
+  do jstate = 1, n_states_print !N_states
+    do istate = jstate + 1, N_states
+      d_x = multi_s_x_dipole_moment(istate,jstate)
+      d_y = multi_s_y_dipole_moment(istate,jstate)
+      d_z = multi_s_z_dipole_moment(istate,jstate)
+      dip_str = d_x**2 + d_y**2 + d_z**2
+      d = multi_s_dipole_moment(istate,jstate)
+      f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate))
+      write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), '  ->', (istate-1), d_x, d_y, d_z, d, dip_str, f
+    enddo
+  enddo
+
+  print*,''
+  print*,' Transition dipole moments (D)'
+  write(*,'(A89)') '   #  Transition       X           Y           Z         ||µ||     Dip. str.   Osc. str.'
+  
+  do jstate = 1, n_states_print !N_states
+    do istate = jstate + 1, N_states
+      d_x = multi_s_x_dipole_moment(istate,jstate) * au_to_D
+      d_y = multi_s_y_dipole_moment(istate,jstate) * au_to_D
+      d_z = multi_s_z_dipole_moment(istate,jstate) * au_to_D
+      d = multi_s_dipole_moment(istate,jstate)
+      dip_str = d_x**2 + d_y**2 + d_z**2
+      f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate))
+      d = multi_s_dipole_moment(istate,jstate) * au_to_D
+      write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), '  ->', (istate-1), d_x, d_y, d_z, d, dip_str, f
+    enddo
+  enddo
+  print*,'=============================================='
+  print*,''
+
+end
+
+! Oscillator strengths
+
+! Provided
+! | N_states                                 | integer          | Number of states                  |
+! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments |
+! | multi_s_deriv1_moment(N_states,N_states) | double precision | Total (transition) ...            |
+! | ci_energy_no_diag(N_states)              | double precision | CI energy of each state |
+
+! Internal
+! | f_l            | double precision | Oscillator strength in length gauge   |
+! | f_v            | double precision | Oscillator strength in velocity gauge |
+! | f_m            | double precision | Oscillator strength in mixed gauge    |
+! | n_states_print | integer          | Number of printed states              |
+
+
+subroutine print_oscillator_strength
+
+  implicit none
+
+  BEGIN_DOC
+  ! https://doi.org/10.1016/j.cplett.2004.03.126
+  ! Oscillator strength in:
+  ! - length gauge, f^l_{ij} = 2/3 (E_i - E_j) <\Psi_i|r|\Psi_j> <\Psi_j|r|\Psi_i>
+  ! - velocity gauge, f^v_{ij} = 2/3 (E_i - E_j)^(-1) <\Psi_i|v|\Psi_j> <\Psi_j|v|\Psi_i>
+  ! - mixed gauge, f^m_{ij} = -2i/3 <\Psi_i|r|\Psi_j> <\Psi_j|v|\Psi_i> 
+  END_DOC
+  
+  integer :: istate,jstate,k, n_states_print
+  double precision :: f_l,f_v,f_m,d,v
+
+  if (N_states == 1 .or. N_det == 1) then
+    return
+  endif
+
+  print*,''
+  print*,'# Oscillator strength:'
+  print*,'=============================================='
+
+  if (print_all_transitions) then
+    n_states_print = N_states
+  else
+   n_states_print = 1
+  endif
+
+  write(*,'(A103)') '  Oscillator strength in length gauge (f_l), velocity gauge (f_v) and mixed length-velocity gauge (f_m)'
+  do jstate = 1, n_states_print !N_states
+    do istate = jstate + 1, N_states
+      d = multi_s_dipole_moment(istate,jstate)
+      v = multi_s_deriv_1(istate,jstate)
+      ! Length gauge
+      f_l = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate))
+      ! Velocity gauge
+      f_v = 2d0/3d0 * v * v * 1d0/dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate))
+      ! Mixed gauge
+      f_m = 2d0/3d0 * d * v
+
+      write(*,'(A19,I3,A9,F10.6,A5,F7.1,A10,F9.6,A6,F9.6,A6,F9.6,A8,F7.3)') '   #  Transition n.', (istate-1), ': Excit.=', dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*ha_to_ev, &
+      ' eV ( ',dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*Ha_to_nm,' nm), f_l=',f_l, ', f_v=', f_v, ', f_m=', f_m, ', <S^2>=', s2_values(istate)
+      !write(*,'(I4,I4,A4,I3,A6,F6.1,A6,F6.1)') (istate-1), (jstate-1), '  ->', (istate-1), ', %T1=', percent_exc(2,istate), ', %T2=',percent_exc(3,istate)
+  
+    enddo
+  enddo
+
+  print*,'=============================================='
+  print*,''
+
+end
diff --git a/src/mol_properties/properties.irp.f b/src/mol_properties/properties.irp.f
new file mode 100644
index 00000000..7ea6f9c3
--- /dev/null
+++ b/src/mol_properties/properties.irp.f
@@ -0,0 +1,14 @@
+program mol_properties
+
+  implicit none
+
+  BEGIN_DOC
+  ! Calculation of the properties
+  END_DOC
+
+  read_wf = .True.
+  touch read_wf
+
+  call print_mol_properties()
+
+end
diff --git a/src/mu_of_r/f_hf_utils.irp.f b/src/mu_of_r/f_hf_utils.irp.f
index 8480a288..102b40a0 100644
--- a/src/mu_of_r/f_hf_utils.irp.f
+++ b/src/mu_of_r/f_hf_utils.irp.f
@@ -86,6 +86,9 @@ subroutine f_HF_valence_ab(r1,r2,f_HF_val_ab,two_bod_dens)
    enddo
   enddo
  enddo
+ ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
+ f_HF_val_ab *= 2.d0 
+ two_bod_dens *= 2.d0
 end
 
 
@@ -136,4 +139,6 @@ subroutine integral_f_HF_valence_ab(r1,int_f_HF_val_ab)
    enddo
   enddo
  enddo
+ ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
+ int_f_HF_val_ab *= 2.d0
 end
diff --git a/src/mu_of_r/f_psi_i_a_v_utils.irp.f b/src/mu_of_r/f_psi_i_a_v_utils.irp.f
index 427da199..69fa16ff 100644
--- a/src/mu_of_r/f_psi_i_a_v_utils.irp.f
+++ b/src/mu_of_r/f_psi_i_a_v_utils.irp.f
@@ -49,6 +49,9 @@ subroutine give_f_ii_val_ab(r1,r2,f_ii_val_ab,two_bod_dens)
    enddo
   enddo
  enddo
+ ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
+ f_ii_val_ab  *= 2.d0 
+ two_bod_dens *= 2.d0
 end
 
 
@@ -142,6 +145,9 @@ subroutine give_f_ia_val_ab(r1,r2,f_ia_val_ab,two_bod_dens,istate)
    f_ia_val_ab += v_tilde(i,a) * rho_tilde(i,a)
   enddo
  enddo
+ ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
+ f_ia_val_ab *= 2.d0 
+ two_bod_dens *= 2.d0
 end
 
 
@@ -222,6 +228,8 @@ subroutine give_f_aa_val_ab(r1,r2,f_aa_val_ab,two_bod_dens,istate)
    f_aa_val_ab += v_tilde(b,a) * rho_tilde(b,a)
   enddo
  enddo
+
+ ! DO NOT multiply by two as in give_f_ii_val_ab and give_f_ia_val_ab because the N(N-1) normalization condition of the active two-rdm
 end
 
 BEGIN_PROVIDER [double precision, two_e_int_aa_f, (n_basis_orb,n_basis_orb,n_act_orb,n_act_orb)]
diff --git a/src/mu_of_r/f_psi_utils.irp.f b/src/mu_of_r/f_psi_utils.irp.f
index bdd76f18..95f10f36 100644
--- a/src/mu_of_r/f_psi_utils.irp.f
+++ b/src/mu_of_r/f_psi_utils.irp.f
@@ -74,7 +74,7 @@ BEGIN_PROVIDER [double precision, full_occ_2_rdm_cntrctd_trans, (n_points_final_
      do j = 1, n_core_inact_act_orb
       do i = 1, n_core_inact_act_orb
                                              !                                 1 2 1 2 
-       full_occ_2_rdm_cntrctd_trans(ipoint,k,l,istate) += full_occ_2_rdm_ab_mo(i,j,k,l,istate) * core_inact_act_mos_in_r_array(j,ipoint) * core_inact_act_mos_in_r_array(i,ipoint)
+       full_occ_2_rdm_cntrctd_trans(ipoint,k,l,istate) += 0.5d0 * full_occ_2_rdm_ab_mo(i,j,k,l,istate) * core_inact_act_mos_in_r_array(j,ipoint) * core_inact_act_mos_in_r_array(i,ipoint)
       enddo
      enddo
     enddo
diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f
index 5c41acdc..f9c3b3b3 100644
--- a/src/mu_of_r/mu_of_r_conditions.irp.f
+++ b/src/mu_of_r/mu_of_r_conditions.irp.f
@@ -110,7 +110,7 @@
  do istate = 1, N_states
   do ipoint = 1, n_points_final_grid
    f_psi  = f_psi_cas_ab(ipoint,istate) 
-   on_top = on_top_cas_mu_r(ipoint,istate)
+   on_top = on_top_cas_mu_r(ipoint,istate) 
    if(on_top.le.1.d-12.or.f_psi.le.0.d0.or.f_psi * on_top.lt.0.d0)then
      w_psi   = 1.d+10
    else 
diff --git a/src/non_h_ints_mu/NEED b/src/non_h_ints_mu/NEED
new file mode 100644
index 00000000..d09ab4a5
--- /dev/null
+++ b/src/non_h_ints_mu/NEED
@@ -0,0 +1,2 @@
+ao_tc_eff_map
+bi_ortho_mos
diff --git a/src/non_h_ints_mu/README.rst b/src/non_h_ints_mu/README.rst
new file mode 100644
index 00000000..6a36bb98
--- /dev/null
+++ b/src/non_h_ints_mu/README.rst
@@ -0,0 +1,11 @@
+=============
+non_h_ints_mu
+=============
+
+Computes the non hermitian potential of the mu-TC Hamiltonian on the AO and BI-ORTHO MO basis.
+The operator is defined in Eq. 33 of JCP 154, 084119 (2021)
+
+The two providers are :
++) ao_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the MO basis. 
++) mo_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the BI-ORTHO MO basis. 
+
diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f
new file mode 100644
index 00000000..af441335
--- /dev/null
+++ b/src/non_h_ints_mu/debug_fit.irp.f
@@ -0,0 +1,512 @@
+
+! --
+
+program debug_fit
+
+  implicit none
+
+  my_grid_becke  = .True.
+
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  !my_n_pt_r_grid = 100
+  !my_n_pt_a_grid = 170
+  !my_n_pt_r_grid = 150
+  !my_n_pt_a_grid = 194
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf j1b_pen
+
+  !call test_j1b_nucl()
+  call test_grad_j1b_nucl()
+  !call test_lapl_j1b_nucl()
+
+  !call test_list_b2()
+  !call test_list_b3()
+
+  call test_fit_u()
+  !call test_fit_u2()
+  !call test_fit_ugradu()
+
+end
+
+! ---
+
+subroutine test_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_j1b_nucl ...'
+
+  PROVIDE v_1b
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b(ipoint) 
+    i_num  = j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in v_1b on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_j1b_nucl
+
+! ---
+
+subroutine test_grad_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  print*, ' test_grad_j1b_nucl ...'
+
+  PROVIDE v_1b_grad
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_grad(1,ipoint) 
+    i_num  = grad_x_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in x of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    i_exc  = v_1b_grad(2,ipoint) 
+    i_num  = grad_y_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in y of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    i_exc  = v_1b_grad(3,ipoint) 
+    i_num  = grad_z_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in z of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_grad_j1b_nucl
+
+! ---
+
+subroutine test_lapl_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: lapl_j1b_nucl
+
+  print*, ' test_lapl_j1b_nucl ...'
+
+  PROVIDE v_1b_lapl
+
+  eps_ij  = 1d-5
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_lapl(ipoint) 
+    i_num  = lapl_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in v_1b_lapl on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_lapl_j1b_nucl
+
+! ---
+
+subroutine test_list_b2()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_list_b2 ...'
+
+  PROVIDE v_1b_list_b2
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_list_b2(ipoint) 
+    i_num  = j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in list_b2 on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_list_b2
+
+! ---
+
+subroutine test_list_b3()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_tmp, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_list_b3 ...'
+
+  PROVIDE v_1b_list_b3
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_list_b3(ipoint) 
+    i_tmp  = j1b_nucl(r)
+    i_num  = i_tmp * i_tmp
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in list_b3 on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_list_b3
+
+! ---
+
+subroutine test_fit_ugradu()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2, tmp, dx, dy, dz
+  double precision           :: r1(3), r2(3), grad(3)
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_ugradu ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+  
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+  
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_1_erf(i)
+        coef   = coef_gauss_j_mu_1_erf(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+      i_fit = i_fit / dsqrt(x2)
+  
+      tmp = j12_mu(r1, r2) 
+      call grad1_j12_mu_exc(r1, r2, grad)
+  
+      ! ---
+  
+      i_exc = tmp * grad(1)
+      i_num = i_fit * dx
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on x in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+      i_exc = tmp * grad(2)
+      i_num = i_fit * dy
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on y in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+      i_exc = tmp * grad(3)
+      i_num = i_fit * dz
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on z in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+    enddo
+
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_ugradu
+
+! ---
+
+subroutine test_fit_u()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2
+  double precision           :: r1(3), r2(3), dx, dy, dz
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_u ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+  
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+  
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_x(i)
+        coef   = coef_gauss_j_mu_x(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+  
+      i_exc = j12_mu(r1, r2) 
+      i_num = i_fit 
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem in test_fit_u on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+    enddo
+  
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_u
+
+! ---
+
+subroutine test_fit_u2()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2
+  double precision           :: r1(3), r2(3), dx, dy, dz, tmp
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_u2 ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+  
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_x_2(i)
+        coef   = coef_gauss_j_mu_x_2(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+
+      tmp   = j12_mu(r1, r2) 
+      i_exc = tmp * tmp
+      i_num = i_fit 
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem in test_fit_u2 on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+    enddo
+
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_u2
+
+! ---
+
+
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
new file mode 100644
index 00000000..5e7ef7e9
--- /dev/null
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -0,0 +1,780 @@
+
+! --
+
+program debug_integ_jmu_modif
+
+  implicit none
+
+  my_grid_becke  = .True.
+
+  !my_n_pt_r_grid = 30
+  !my_n_pt_a_grid = 50
+  !my_n_pt_r_grid = 100
+  !my_n_pt_a_grid = 170
+  my_n_pt_r_grid = 150
+  my_n_pt_a_grid = 194
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf j1b_pen
+
+!  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_erf_rk_cst_mu_j1b()
+!  call test_x_v_ij_erf_rk_cst_mu_j1b()
+!  call test_int2_u2_j1b2()
+!  call test_int2_grad1u2_grad2u2_j1b2()
+!  call test_int2_u_grad1u_total_j1b2()
+!
+!  call test_int2_grad1_u12_ao()
+!
+!  call test_grad12_j12()
+!  call test_u12sq_j1bsq()
+!  call test_u12_grad1_u12_j1b_grad1_j1b()
+!  !call test_gradu_squared_u_ij_mu()
+
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
+end
+
+! ---
+
+subroutine test_v_ij_u_cst_mu_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_v_ij_u_cst_mu_j1b
+
+  print*, ' test_v_ij_u_cst_mu_j1b ...'
+
+  PROVIDE v_ij_u_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     v_ij_u_cst_mu_j1b(i,j,ipoint) 
+        i_num  = num_v_ij_u_cst_mu_j1b(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in v_ij_u_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_v_ij_u_cst_mu_j1b
+
+! ---
+
+subroutine test_v_ij_erf_rk_cst_mu_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_v_ij_erf_rk_cst_mu_j1b
+
+  print*, ' test_v_ij_erf_rk_cst_mu_j1b ...'
+
+  PROVIDE v_ij_erf_rk_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) 
+        i_num  = num_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine test_x_v_ij_erf_rk_cst_mu_j1b()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: integ(3)
+
+  print*, ' test_x_v_ij_erf_rk_cst_mu_j1b ...'
+
+  PROVIDE x_v_ij_erf_rk_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_x_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint, integ)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) 
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_x_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine test_int2_u2_j1b2()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_int2_u2_j1b2
+
+  print*, ' test_int2_u2_j1b2 ...'
+
+  PROVIDE int2_u2_j1b2
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     int2_u2_j1b2(i,j,ipoint) 
+        i_num  = num_int2_u2_j1b2(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in int2_u2_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  acc_tot = acc_tot / normalz
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_u2_j1b2
+
+! ---
+
+subroutine test_int2_grad1u2_grad2u2_j1b2()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_int2_grad1u2_grad2u2_j1b2
+
+  print*, ' test_int2_grad1u2_grad2u2_j1b2 ...'
+
+  PROVIDE int2_grad1u2_grad2u2_j1b2
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     int2_grad1u2_grad2u2_j1b2(i,j,ipoint) 
+        i_num  = num_int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in int2_grad1u2_grad2u2_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_grad1u2_grad2u2_j1b2
+
+! ---
+
+subroutine test_int2_grad1_u12_ao()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: integ(3)
+
+  print*, ' test_int2_grad1_u12_ao ...'
+
+  PROVIDE int2_grad1_u12_ao
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_int2_grad1_u12_ao(i, j, ipoint, integ)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,1) 
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_grad1_u12_ao
+
+! ---
+
+subroutine test_int2_u_grad1u_total_j1b2()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: x, y, z
+  double precision :: integ(3)
+
+  print*, ' test_int2_u_grad1u_total_j1b2 ...'
+
+  PROVIDE int2_u_grad1u_j1b2
+  PROVIDE int2_u_grad1u_x_j1b2 
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
+
+        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_u_grad1u_total_j1b2
+
+! ---
+
+subroutine test_gradu_squared_u_ij_mu()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_gradu_squared_u_ij_mu
+
+  print*, ' test_gradu_squared_u_ij_mu ...'
+
+  PROVIDE gradu_squared_u_ij_mu
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     gradu_squared_u_ij_mu(i,j,ipoint) 
+        i_num  = num_gradu_squared_u_ij_mu(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in gradu_squared_u_ij_mu on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_gradu_squared_u_ij_mu 
+
+! ---
+
+subroutine test_grad12_j12()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_grad12_j12
+
+  print*, ' test_grad12_j12 ...'
+
+  PROVIDE grad12_j12
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     grad12_j12(i,j,ipoint) 
+        i_num  = num_grad12_j12(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in grad12_j12 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_grad12_j12
+
+! ---
+
+subroutine test_u12sq_j1bsq()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_u12sq_j1bsq
+
+  print*, ' test_u12sq_j1bsq ...'
+
+  PROVIDE u12sq_j1bsq
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     u12sq_j1bsq(i,j,ipoint) 
+        i_num  = num_u12sq_j1bsq(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in u12sq_j1bsq on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_u12sq_j1bsq
+
+! ---
+
+subroutine test_u12_grad1_u12_j1b_grad1_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_u12_grad1_u12_j1b_grad1_j1b
+
+  print*, ' test_u12_grad1_u12_j1b_grad1_j1b ...'
+
+  PROVIDE u12_grad1_u12_j1b_grad1_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) 
+        i_num  = num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in u12_grad1_u12_j1b_grad1_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
new file mode 100644
index 00000000..7925fa7c
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -0,0 +1,466 @@
+
+! ---
+
+!       TODO : strong optmization : write the loops in a different way
+!            : for each couple of AO, the gaussian product are done once for all 
+
+BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ]
+
+  BEGIN_DOC
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! gradu_squared_u_ij_mu = -0.50 x \int r2 [ (grad_1 u12)^2 + (grad_2 u12^2)] \phi_i(2) \phi_j(2)
+  !                       = -0.25 x \int r2       (1 - erf(mu*r12))^2          \phi_i(2) \phi_j(2)
+  ! and
+  !   (1 - erf(mu*r12))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * r12^2)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2  v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2  v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ]
+  !                       = -0.25 x        v1^2      \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2
+  !                       + -0.50 x    (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2)      u12^2          v2^2
+  !                       + -1.00 x v1 (grad_1 v1)   \int r2 \phi_i(2) \phi_j(2)   (grad_1 u12)      v2^2 
+  !                       =                 v1^2        x   int2_grad1u2_grad2u2_j1b2
+  !                       + -0.5 x      (grad_1 v1)^2   x   int2_u2_j1b2
+  !                       + -1.0 X V1 x (grad_1 v1)   \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ]
+  !
+  !
+  END_DOC
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z, r(3), delta, coef
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing gradu_squared_u_ij_mu ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+
+      x     = final_grid_points(1,ipoint)
+      y     = final_grid_points(2,ipoint)
+      z     = final_grid_points(3,ipoint)
+      tmp_v = v_1b       (ipoint)
+      tmp_x = v_1b_grad(1,ipoint)
+      tmp_y = v_1b_grad(2,ipoint)
+      tmp_z = v_1b_grad(3,ipoint)
+
+      tmp1 = tmp_v * tmp_v
+      tmp2 = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+      tmp3 = tmp_v * tmp_x
+      tmp4 = tmp_v * tmp_y
+      tmp5 = tmp_v * tmp_z
+
+      tmp6 = -x * tmp3
+      tmp7 = -y * tmp4
+      tmp8 = -z * tmp5
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+          gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)            &
+                                            + tmp2 * int2_u2_j1b2             (i,j,ipoint)            &
+                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    gradu_squared_u_ij_mu = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            gradu_squared_u_ij_mu(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: ipoint, i, j, k, l
+!  double precision              :: weight1, ao_ik_r, ao_i_r
+!  double precision, allocatable :: ac_mat(:,:,:,:)
+!
+!  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+!  ac_mat = 0.d0
+!
+!  do ipoint = 1, n_points_final_grid
+!    weight1 = final_weight_at_r_vector(ipoint)
+!
+!    do i = 1, ao_num
+!      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+!
+!      do k = 1, ao_num
+!        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+!
+!        do j = 1, ao_num
+!          do l = 1, ao_num
+!            ac_mat(k,i,l,j) += ao_ik_r * gradu_squared_u_ij_mu(l,j,ipoint)
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+!
+!  do j = 1, ao_num
+!    do l = 1, ao_num
+!      do i = 1, ao_num
+!        do k = 1, ao_num
+!          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+!          !write(11,*) tc_grad_square_ao_loop(k,i,l,j)
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+!
+!  deallocate(ac_mat)
+!
+!END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_square_ao_loop ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+  allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
+  bc_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      ao_i_r = weight1 * aos_in_r_array(i,ipoint)
+
+      do k = 1, ao_num
+        !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) )
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+  deallocate(bc_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing grad12_j12 ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12 = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12 = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq(i,j,ipoint) = tmp1 * int2_u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao ...'
+  call wall_time(time0)
+
+  if(read_tc_integ) then
+
+    open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              read(11) tc_grad_square_ao(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+
+    allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+  
+    b_mat = 0.d0
+   !$OMP PARALLEL               &
+   !$OMP DEFAULT (NONE)         &
+   !$OMP PRIVATE (i, k, ipoint) & 
+   !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+   !$OMP DO SCHEDULE (static)
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    tmp = 0.d0
+   !$OMP PARALLEL               &
+   !$OMP DEFAULT (NONE)         &
+   !$OMP PRIVATE (j, l, ipoint) & 
+   !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
+   !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid
+      do j = 1, ao_num
+        do l = 1, ao_num
+          tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    tc_grad_square_ao = 0.d0
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+              , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+              , 1.d0, tc_grad_square_ao, ao_num*ao_num)
+    deallocate(tmp, b_mat)
+  
+    call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num)
+  
+   !!$OMP PARALLEL             &
+   !!$OMP DEFAULT (NONE)       &
+   !!$OMP PRIVATE (i, j, k, l) & 
+   !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num)
+   !!$OMP DO SCHEDULE (static)
+   ! do j = 1, ao_num
+   !   do l = 1, ao_num
+   !     do i = 1, ao_num
+   !       do k = 1, ao_num
+   !         tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) 
+   !       enddo
+   !     enddo
+   !   enddo
+   ! enddo
+   !!$OMP END DO
+   !!$OMP END PARALLEL
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              write(11) tc_grad_square_ao(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..cb9e15c4
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,387 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test ...'
+  call wall_time(time0)
+
+  if(read_tc_integ) then
+  
+    open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              read(11) tc_grad_square_ao_test(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+
+    provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+  
+    allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+  
+    b_mat = 0.d0
+   !$OMP PARALLEL               &
+   !$OMP DEFAULT (NONE)         &
+   !$OMP PRIVATE (i, k, ipoint) & 
+   !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+   !$OMP DO SCHEDULE (static)
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    tmp = 0.d0
+   !$OMP PARALLEL               &
+   !$OMP DEFAULT (NONE)         &
+   !$OMP PRIVATE (j, l, ipoint) & 
+   !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+   !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid
+      do j = 1, ao_num
+        do l = 1, ao_num
+          tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    tc_grad_square_ao_test = 0.d0
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+              , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+              , 1.d0, tc_grad_square_ao_test, ao_num*ao_num)
+    deallocate(tmp, b_mat)
+  
+    call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num)
+    !do i = 1, ao_num
+    !  do j = 1, ao_num
+    !    do k = i, ao_num
+  
+    !      do l = max(j,k), ao_num
+    !        tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j))
+    !        tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l)
+    !      end do
+  
+    !      !if (j.eq.k) then
+    !      !  do l = j+1, ao_num
+    !      !    tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j))
+    !      !    tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l)
+    !      !  end do
+    !      !else
+    !      !  do l = j, ao_num
+    !      !    tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j))
+    !      !    tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l)
+    !      !  enddo
+    !      !endif
+  
+    !    enddo
+    !  enddo
+    !enddo
+    !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test
+  ! !$OMP PARALLEL             &
+  ! !$OMP DEFAULT (NONE)       &
+  ! !$OMP PRIVATE (i, j, k, l) & 
+  ! !$OMP SHARED (tc_grad_square_ao_test, ao_num)
+  ! !$OMP DO SCHEDULE (static)
+  !  integer :: ii
+  !  ii = 0
+  !  do j = 1, ao_num
+  !    do l = 1, ao_num
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+  !          if((i.lt.j) .and. (k.lt.l)) cycle
+  !          ii = ii + 1
+  !          tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_square_ao_test(l,j,k,i)
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !  print *, ' ii =', ii
+  ! !$OMP END DO
+  ! !$OMP END PARALLEL
+  
+  ! !$OMP PARALLEL             &
+  ! !$OMP DEFAULT (NONE)       &
+  ! !$OMP PRIVATE (i, j, k, l) & 
+  ! !$OMP SHARED (tc_grad_square_ao_test, ao_num)
+  ! !$OMP DO SCHEDULE (static)
+  !  do j = 1, ao_num
+  !    do l = 1, ao_num
+  !      do i = 1, j-1
+  !        do k = 1, l-1
+  !          ii = ii + 1
+  !          tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(l,j,k,i)
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !  print *, ' ii =', ii
+  !  print *, ao_num * ao_num * ao_num * ao_num
+  ! !$OMP END DO
+  ! !$OMP END PARALLEL
+
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              write(11) tc_grad_square_ao_test(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test_ref(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test_ref ...'
+  call wall_time(time0)
+
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test_ref(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+
+  provide int2_u_grad1u_x_j1b2_test
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+  provide int2_grad1u2_grad2u2_j1b2_test
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/grad_tc_int.irp.f b/src/non_h_ints_mu/grad_tc_int.irp.f
new file mode 100644
index 00000000..cb3b71a3
--- /dev/null
+++ b/src/non_h_ints_mu/grad_tc_int.irp.f
@@ -0,0 +1,217 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !                            1 1 2 2      1 2                                1 2 
+  !
+  ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, ipoint, m
+  double precision              :: weight1, r(3)
+  double precision              :: wall1, wall0
+  double precision, allocatable :: b_mat(:,:,:,:), ac_mat(:,:,:,:)
+
+  provide v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu
+
+  call wall_time(wall0)
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+ !$OMP PARALLEL                         &
+ !$OMP DEFAULT (NONE)                   &
+ !$OMP PRIVATE (i,k,m,ipoint,r,weight1) & 
+ !$OMP SHARED (aos_in_r_array_transp,aos_grad_in_r_array_transp_bis,b_mat)& 
+ !$OMP SHARED (ao_num,n_points_final_grid,final_grid_points,final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do m = 1, 3
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          r(1) = final_grid_points(1,ipoint)
+          r(2) = final_grid_points(2,ipoint)
+          r(3) = final_grid_points(3,ipoint)
+          weight1 = final_weight_at_r_vector(ipoint)
+          b_mat(ipoint,k,i,m) = 0.5d0 * aos_in_r_array_transp(ipoint,k) * r(m) * weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,m) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ! (A)                b_mat(ipoint,k,i,m) X v_ij_erf_rk_cst_mu(j,l,r1)
+  ! 1/2 \int dr1 x1 phi_k(1) d/dx1 phi_i(1) \int dr2 (1 - erf(mu_r12))/r12  phi_j(2) phi_l(2)
+  ac_mat = 0.d0
+  do m = 1, 3
+    !           A   B^T  dim(A,1)       dim(B,2)       dim(A,2)        alpha * A                LDA 
+
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0             &
+              , v_ij_erf_rk_cst_mu(1,1,1), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+
+ !$OMP PARALLEL                       &
+ !$OMP DEFAULT (NONE)                 &
+ !$OMP PRIVATE (i,k,m,ipoint,weight1) & 
+ !$OMP SHARED (aos_in_r_array_transp,aos_grad_in_r_array_transp_bis,b_mat,ao_num,n_points_final_grid,final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do m = 1, 3
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          weight1 = final_weight_at_r_vector(ipoint)
+          b_mat(ipoint,k,i,m) = 0.5d0 * aos_in_r_array_transp(ipoint,k) * weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,m) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ ! (B)                b_mat(ipoint,k,i,m) X x_v_ij_erf_rk_cst_mu(j,l,r1,m)
+ ! 1/2 \int dr1 phi_k(1) d/dx1 phi_i(1) \int dr2 x2(1 - erf(mu_r12))/r12  phi_j(2) phi_l(2)
+  do m = 1, 3
+   !           A   B^T  dim(A,1)       dim(B,2)       dim(A,2)        alpha * A                LDA 
+
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0                &
+              , x_v_ij_erf_rk_cst_mu(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+  enddo
+
+ !$OMP PARALLEL          &
+ !$OMP DEFAULT (NONE)    &
+ !$OMP PRIVATE (i,k,j,l) & 
+ !$OMP SHARED (ac_mat,ao_non_hermit_term_chemist,ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          !                          (ki|lj)           (ki|lj)           (lj|ki)
+          ao_non_hermit_term_chemist(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)    
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time dgemm ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO :: optimization :: transform into DGEM
+
+BEGIN_PROVIDER [double precision, mo_non_hermit_term_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !                            1 1 2 2      1 2                                1 2 
+  !
+  ! mo_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the MO basis
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+ 
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_coef_transp(k,q) * ao_non_hermit_term_chemist(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  free ao_non_hermit_term_chemist 
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_non_hermit_term_chemist = 0.d0
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_non_hermit_term_chemist(k,i,l,j) += mo_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_non_hermit_term, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !                    1 2 1 2      1 2                                1 2 
+  !
+  ! mo_non_hermit_term(k,l,i,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the MO basis
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+          mo_non_hermit_term(k,l,i,j) = mo_non_hermit_term_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
new file mode 100644
index 00000000..a515e0b8
--- /dev/null
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -0,0 +1,640 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e, fact_r
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 1.d0
+    do j = 1, nucl_num
+      a  = j1b_pen(j)
+      dx = x - nucl_coord(j,1)
+      dy = y - nucl_coord(j,2)
+      dz = z - nucl_coord(j,3)
+      d  = dx*dx + dy*dy + dz*dz
+      e  = 1.d0 - dexp(-a*d)
+
+      fact_r = fact_r * e
+    enddo
+
+    v_1b(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_grad, (3, n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e
+  double precision :: fact_x, fact_y, fact_z
+  double precision :: ax_der, ay_der, az_der, a_expo
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      phase  = 0
+      a_expo = 0.d0
+      ax_der = 0.d0
+      ay_der = 0.d0
+      az_der = 0.d0
+      do j = 1, nucl_num
+        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+      
+        phase  += List_all_comb_b2(j,i)
+        a_expo += a * (dx*dx + dy*dy + dz*dz)
+        ax_der += a * dx
+        ay_der += a * dy
+        az_der += a * dz
+      enddo
+      e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo)
+
+      fact_x += e * ax_der 
+      fact_y += e * ay_der 
+      fact_z += e * az_der 
+    enddo
+
+    v_1b_grad(1,ipoint) = fact_x
+    v_1b_grad(2,ipoint) = fact_y
+    v_1b_grad(3,ipoint) = fact_z
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_lapl, (n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e, b
+  double precision :: fact_r
+  double precision :: ax_der, ay_der, az_der, a_expo
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      phase  = 0
+      b      = 0.d0
+      a_expo = 0.d0
+      ax_der = 0.d0
+      ay_der = 0.d0
+      az_der = 0.d0
+      do j = 1, nucl_num
+        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+      
+        phase  += List_all_comb_b2(j,i)
+        b      += a
+        a_expo += a * (dx*dx + dy*dy + dz*dz)
+        ax_der += a * dx
+        ay_der += a * dy
+        az_der += a * dz
+      enddo
+
+      fact_r += (-1.d0)**dble(phase) * (-6.d0*b + 4.d0*(ax_der*ax_der + ay_der*ay_der + az_der*az_der) ) * dexp(-a_expo)
+    enddo
+
+    v_1b_lapl(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_list_b2, (n_points_final_grid)]
+
+  implicit none
+  integer          :: i, ipoint
+  double precision :: x, y, z, coef, expo, dx, dy, dz
+  double precision :: fact_r
+
+  PROVIDE List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      coef = List_all_comb_b2_coef(i)
+      expo = List_all_comb_b2_expo(i) 
+
+      dx = x - List_all_comb_b2_cent(1,i)
+      dy = y - List_all_comb_b2_cent(2,i)
+      dz = z - List_all_comb_b2_cent(3,i)
+
+      fact_r += coef * dexp(-expo * (dx*dx + dy*dy + dz*dz))
+    enddo
+
+    v_1b_list_b2(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_list_b3, (n_points_final_grid)]
+
+  implicit none
+  integer          :: i, ipoint
+  double precision :: x, y, z, coef, expo, dx, dy, dz
+  double precision :: fact_r
+
+  PROVIDE List_all_comb_b3_coef List_all_comb_b3_expo List_all_comb_b3_cent
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b3_size
+
+      coef = List_all_comb_b3_coef(i)
+      expo = List_all_comb_b3_expo(i) 
+
+      dx = x - List_all_comb_b3_cent(1,i)
+      dy = y - List_all_comb_b3_cent(2,i)
+      dz = z - List_all_comb_b3_cent(3,i)
+
+      fact_r += coef * dexp(-expo * (dx*dx + dy*dy + dz*dz))
+    enddo
+
+    v_1b_list_b3(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+double precision function jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j12_mu, j12_nucl
+
+  jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2)
+
+  return
+end function jmu_modif
+
+! ---
+
+double precision function j12_mu(r1, r2)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: mu_r12, r12
+
+  r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) &
+             + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
+             + (r1(3) - r2(3)) * (r1(3) - r2(3)) )
+  mu_r12 = mu_erf * r12
+
+  j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu
+
+! ---
+
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
+double precision function j12_mu_gauss(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  integer                      :: i
+  double precision             :: r12, coef, expo
+
+  r12 = (r1(1) - r2(1)) * (r1(1) - r2(1)) &
+      + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
+      + (r1(3) - r2(3)) * (r1(3) - r2(3)) 
+
+  j12_mu_gauss = 0.d0
+  do i = 1, n_max_fit_slat
+    expo = expo_gauss_j_mu_x(i)
+    coef = coef_gauss_j_mu_x(i)
+
+    j12_mu_gauss += coef * dexp(-expo*r12)
+  enddo
+
+  return
+end function j12_mu_gauss
+
+! ---
+
+double precision function j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  integer                      :: i
+  double precision             :: a, d, e
+
+  j1b_nucl = 1.d0
+
+  do i = 1, nucl_num
+    a = j1b_pen(i)
+    d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+        + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+        + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+    e = 1.d0 - exp(-a*d)
+
+    j1b_nucl = j1b_nucl * e
+  enddo
+
+  return
+end function j1b_nucl
+
+! ---
+
+double precision function j12_nucl(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j1b_nucl
+
+  j12_nucl = j1b_nucl(r1) * j1b_nucl(r2)
+
+  return
+end function j12_nucl
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad_x_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(1)))
+
+  r_eps(1) = r_eps(1) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_x_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_x_j1b_nucl
+
+double precision function grad_y_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(2)))
+
+  r_eps(2) = r_eps(2) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_y_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_y_j1b_nucl
+
+double precision function grad_z_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(3)))
+
+  r_eps(3) = r_eps(3) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_z_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_z_j1b_nucl
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+double precision function lapl_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: grad_x_j1b_nucl
+  double precision, external   :: grad_y_j1b_nucl
+  double precision, external   :: grad_z_j1b_nucl
+
+  eps   = 1d-5
+  r_eps = r
+
+  lapl_j1b_nucl = 0.d0
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(1)))
+  r_eps(1) = r_eps(1) + delta
+  fp       = grad_x_j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) - 2.d0 * delta
+  fm       = grad_x_j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(2)))
+  r_eps(2) = r_eps(2) + delta
+  fp       = grad_y_j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) - 2.d0 * delta
+  fm       = grad_y_j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(3)))
+  r_eps(3) = r_eps(3) + delta
+  fp       = grad_z_j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) - 2.d0 * delta
+  fm       = grad_z_j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  return
+end function lapl_j1b_nucl
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad1_x_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(1)))
+
+  r1_eps(1) = r1_eps(1) + delta
+  fp        = jmu_modif(r1_eps, r2)
+  r1_eps(1) = r1_eps(1) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2)
+
+  grad1_x_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_x_jmu_modif
+
+double precision function grad1_y_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(2)))
+
+  r1_eps(2) = r1_eps(2) + delta
+  fp        = jmu_modif(r1_eps, r2) 
+  r1_eps(2) = r1_eps(2) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2) 
+
+  grad1_y_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_y_jmu_modif
+
+double precision function grad1_z_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(3)))
+
+  r1_eps(3) = r1_eps(3) + delta
+  fp        = jmu_modif(r1_eps, r2)
+  r1_eps(3) = r1_eps(3) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2)
+
+  grad1_z_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_z_jmu_modif
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad1_x_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(1)))
+
+  r1_eps(1) = r1_eps(1) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(1) = r1_eps(1) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_x_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_x_j12_mu_num
+
+double precision function grad1_y_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(2)))
+
+  r1_eps(2) = r1_eps(2) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(2) = r1_eps(2) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_y_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_y_j12_mu_num
+
+double precision function grad1_z_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(3)))
+
+  r1_eps(3) = r1_eps(3) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(3) = r1_eps(3) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_z_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_z_j12_mu_num
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+subroutine grad1_j12_mu_exc(r1, r2, grad)
+
+  implicit none
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: dx, dy, dz, r12, tmp
+
+  grad = 0.d0
+
+  dx = r1(1) - r2(1)
+  dy = r1(2) - r2(2)
+  dz = r1(3) - r2(3)
+
+  r12 = dsqrt( dx * dx + dy * dy + dz * dz )
+  if(r12 .lt. 1d-10) return
+
+  tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12
+
+  grad(1) = tmp * dx 
+  grad(2) = tmp * dy 
+  grad(3) = tmp * dz 
+
+  return
+end subroutine grad1_j12_mu_exc
+
+! ---
+
+subroutine grad1_jmu_modif_num(r1, r2, grad)
+
+  implicit none
+
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+
+  double precision              :: tmp0, tmp1, tmp2, tmp3, tmp4, grad_u12(3)
+
+  double precision, external    :: j12_mu
+  double precision, external    :: j1b_nucl
+  double precision, external    :: grad_x_j1b_nucl
+  double precision, external    :: grad_y_j1b_nucl
+  double precision, external    :: grad_z_j1b_nucl
+
+  call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+  tmp0 = j1b_nucl(r1) 
+  tmp1 = j1b_nucl(r2)
+  tmp2 = j12_mu(r1, r2)
+  tmp3 = tmp0 * tmp1
+  tmp4 = tmp2 * tmp1
+
+  grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl(r1)
+  grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl(r1)
+  grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl(r1)
+
+  return
+end subroutine grad1_jmu_modif_num
+
+! ---
+
+
+
+
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
new file mode 100644
index 00000000..a15f690a
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -0,0 +1,412 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j, m
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(read_tc_integ) then
+
+    open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read")
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          do j = 1, ao_num
+            do i = 1, ao_num
+              read(11) int2_grad1_u12_ao(i,j,ipoint,m) 
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+
+    if(j1b_type .eq. 3) then
+      do ipoint = 1, n_points_final_grid
+        x = final_grid_points(1,ipoint)
+        y = final_grid_points(2,ipoint)
+        z = final_grid_points(3,ipoint)
+        tmp0  = 0.5d0 * v_1b(ipoint)
+        tmp_x =  v_1b_grad(1,ipoint)
+        tmp_y =  v_1b_grad(2,ipoint)
+        tmp_z =  v_1b_grad(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+            tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+            int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+            int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+            int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+          enddo
+        enddo
+      enddo
+    else
+      do ipoint = 1, n_points_final_grid
+        x = final_grid_points(1,ipoint)
+        y = final_grid_points(2,ipoint)
+        z = final_grid_points(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+            int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
+            int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
+            int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
+          enddo
+        enddo
+      enddo
+      int2_grad1_u12_ao *= 0.5d0
+    endif
+
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write")
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          do j = 1, ao_num
+            do i = 1, ao_num
+              write(11) int2_grad1_u12_ao(i,j,ipoint,m) 
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_loop ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+
+  ! ---
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      !ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      ao_i_r  = weight1 * aos_in_r_array     (i,ipoint)
+      ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3)
+
+      do k = 1, ao_num
+        !ao_k_r = aos_in_r_array_transp(ipoint,k)
+        ao_k_r = aos_in_r_array(k,ipoint)
+
+        !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) 
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+
+            contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x 
+            contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y 
+            contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z 
+
+            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
+ 
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao ...'
+  call wall_time(time0)
+
+  if(read_tc_integ) then
+
+    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="read")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              read(11) tc_grad_and_lapl_ao(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+
+    allocate(b_mat(n_points_final_grid,ao_num,ao_num,3))
+  
+    b_mat = 0.d0
+   !$OMP PARALLEL                                                              &
+   !$OMP DEFAULT (NONE)                                                        &
+   !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+   !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+   !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+   !$OMP DO SCHEDULE (static)
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+  
+          weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+          ao_i_r  = aos_in_r_array_transp(ipoint,i)
+          ao_k_r  = aos_in_r_array_transp(ipoint,k)
+  
+          b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) 
+          b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) 
+          b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) 
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    tc_grad_and_lapl_ao = 0.d0
+    do m = 1, 3
+      call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
+                , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+                , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) 
+  
+    enddo
+    deallocate(b_mat)
+  
+    call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num)
+  ! !$OMP PARALLEL             &
+  ! !$OMP DEFAULT (NONE)       &
+  ! !$OMP PRIVATE (i, j, k, l) & 
+  ! !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num)
+  ! !$OMP DO SCHEDULE (static)
+  !  do j = 1, ao_num
+  !    do l = 1, ao_num
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+  !          tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  ! !$OMP END DO
+  ! !$OMP END PARALLEL
+
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="write")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              write(11) tc_grad_and_lapl_ao(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..47b05e52
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,227 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j, m
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(read_tc_integ) then
+
+    open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read")
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          do j = 1, ao_num
+            do i = 1, ao_num
+              read(11) int2_grad1_u12_ao_test(i,j,ipoint,m)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+  
+    if(j1b_type .eq. 3) then
+      do ipoint = 1, n_points_final_grid
+        x = final_grid_points(1,ipoint)
+        y = final_grid_points(2,ipoint)
+        z = final_grid_points(3,ipoint) 
+        tmp0  = 0.5d0 * v_1b(ipoint)
+        tmp_x =  v_1b_grad(1,ipoint)
+        tmp_y =  v_1b_grad(2,ipoint)
+        tmp_z =  v_1b_grad(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+            tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+            int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x
+            int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y
+            int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z
+          enddo
+        enddo
+      enddo
+    else
+      do ipoint = 1, n_points_final_grid
+        x = final_grid_points(1,ipoint)
+        y = final_grid_points(2,ipoint)
+        z = final_grid_points(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+            int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1)
+            int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2)
+            int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3)
+          enddo
+        enddo
+      enddo
+      int2_grad1_u12_ao_test *= 0.5d0
+    endif
+
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write")
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          do j = 1, ao_num
+            do i = 1, ao_num
+              write(11) int2_grad1_u12_ao_test(i,j,ipoint,m)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_test ...'
+  call wall_time(time0)
+
+  if(read_tc_integ) then
+  
+    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              read(11) tc_grad_and_lapl_ao_test(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+
+  else
+
+    provide int2_grad1_u12_ao_test 
+   
+    allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+  
+    b_mat = 0.d0
+   !$OMP PARALLEL                                                              &
+   !$OMP DEFAULT (NONE)                                                        &
+   !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+   !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+   !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+   !$OMP DO SCHEDULE (static)
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+  
+          weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+          ao_i_r  = aos_in_r_array_transp(ipoint,i)
+          ao_k_r  = aos_in_r_array_transp(ipoint,k)
+  
+          b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+          b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+          b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    ac_mat = 0.d0
+    do m = 1, 3
+      call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                   &
+                , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+                , 1.d0, ac_mat, ao_num*ao_num)
+  
+    enddo
+    deallocate(b_mat)
+  
+   !$OMP PARALLEL             &
+   !$OMP DEFAULT (NONE)       &
+   !$OMP PRIVATE (i, j, k, l) & 
+   !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num)
+   !$OMP DO SCHEDULE (static)
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          enddo
+        enddo
+      enddo
+    enddo
+   !$OMP END DO
+   !$OMP END PARALLEL
+  
+    deallocate(ac_mat)
+
+  endif
+
+  if(write_tc_integ) then
+    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write")
+      do i = 1, ao_num
+        do j = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              write(11) tc_grad_and_lapl_ao_test(l,k,j,i)
+            enddo
+          enddo
+        enddo
+      enddo
+    close(11)
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/numerical_integ.irp.f b/src/non_h_ints_mu/numerical_integ.irp.f
new file mode 100644
index 00000000..dcd7a52a
--- /dev/null
+++ b/src/non_h_ints_mu/numerical_integ.irp.f
@@ -0,0 +1,623 @@
+
+! --- 
+
+double precision function num_v_ij_u_cst_mu_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12 \phi_i(r2) \phi_j(r2) x v_1b(r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+
+  double precision, external :: ao_value
+  double precision, external :: j12_mu, j1b_nucl, j12_mu_gauss
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_v_ij_u_cst_mu_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    num_v_ij_u_cst_mu_j1b += ao_value(i, r2) * ao_value(j, r2) * j12_mu_gauss(r1, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+  enddo
+
+  return
+end function num_v_ij_u_cst_mu_j1b
+
+! ---
+
+double precision function num_int2_u2_j1b2(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12^2 \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint, i_fit
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, x2, tmp1, tmp2, tmp3, coef, expo
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_int2_u2_j1b2 = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    x2    = dx * dx + dy * dy + dz * dz 
+    r12   = dsqrt(x2)
+
+    tmp1 = j1b_nucl(r2)
+    tmp2 = tmp1 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+    
+    !tmp3 = 0.d0
+    !do i_fit = 1, n_max_fit_slat
+    !  expo = expo_gauss_j_mu_x_2(i_fit)
+    !  coef = coef_gauss_j_mu_x_2(i_fit)
+    !  tmp3 += coef * dexp(-expo*x2)
+    !enddo
+    tmp3 = j12_mu(r1, r2)
+    tmp3 = tmp3 * tmp3
+
+    num_int2_u2_j1b2 += tmp2 * tmp3
+  enddo
+
+  return
+end function num_int2_u2_j1b2
+
+! ---
+
+double precision function num_int2_grad1u2_grad2u2_j1b2(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 \frac{-[erf(mu r12) -1]^2}{4} \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint, i_fit
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, x2, tmp1, tmp2, tmp3, coef, expo
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_int2_grad1u2_grad2u2_j1b2 = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    x2    = dx * dx + dy * dy + dz * dz 
+    r12   = dsqrt(x2)
+
+    tmp1 = j1b_nucl(r2)
+    tmp2 = tmp1 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+    
+    !tmp3 = 0.d0
+    !do i_fit = 1, n_max_fit_slat
+    !  expo = expo_gauss_1_erf_x_2(i_fit)
+    !  coef = coef_gauss_1_erf_x_2(i_fit)
+    !  tmp3 += coef * dexp(-expo*x2)
+    !enddo
+    tmp3 = derf(mu_erf*r12) - 1.d0
+    tmp3 = tmp3 * tmp3
+
+    tmp3 = -0.25d0 * tmp3
+
+    num_int2_grad1u2_grad2u2_j1b2 += tmp2 * tmp3
+  enddo
+
+  return
+end function num_int2_grad1u2_grad2u2_j1b2
+
+! ---
+
+double precision function num_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [erf(mu r12) -1]/r12 \phi_i(r2) \phi_j(r2) x v_1b(r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, tmp1, tmp2
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_v_ij_erf_rk_cst_mu_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp1  = (derf(mu_erf * r12) - 1.d0) / r12
+    tmp2  = tmp1 * ao_value(i, r2) * ao_value(j, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+
+    num_v_ij_erf_rk_cst_mu_j1b += tmp2
+  enddo
+
+  return
+end function num_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine num_x_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [erf(mu r12) -1]/r12 \phi_i(r2) \phi_j(r2) x v_1b(r2) x r2
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: r1(3), r2(3), grad(3)
+  double precision              :: dx, dy, dz, r12, tmp1, tmp2
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+  double precision, external    :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp1  = (derf(mu_erf * r12) - 1.d0) / r12
+    tmp2  = tmp1 * ao_value(i, r2) * ao_value(j, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+
+    tmp_x += tmp2 * r2(1)
+    tmp_y += tmp2 * r2(2)
+    tmp_z += tmp2 * r2(3)
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_x_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine num_int2_grad1_u12_ao(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [-grad_1 u12] \phi_i(r2) \phi_j(r2) x v12_1b(r1, r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: tmp, r1(3), r2(3), grad(3)
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    tmp   = ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+
+    call grad1_jmu_modif_num(r1, r2, grad)
+
+    tmp_x += tmp * (-1.d0 * grad(1))
+    tmp_y += tmp * (-1.d0 * grad(2))
+    tmp_z += tmp * (-1.d0 * grad(3))
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_int2_grad1_u12_ao
+
+! ---
+
+double precision function num_gradu_squared_u_ij_mu(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2
+  !      [  v1^2    ((grad_1 u12)^2 + (grad_2 u12^2)]) 
+  !      + u12^2         (grad_1 v1)^2 
+  !      + 2 u12 v1 (grad_1 u12) . (grad_1 v1) 
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_gradu_squared_u_ij_mu = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    fst_term = 0.5d0 * tmp1 * tmp1 * v1_tmp * v1_tmp
+    scd_term = u12_tmp * u12_tmp * (dx1_v1*dx1_v1 + dy1_v1*dy1_v1 + dz1_v1*dz1_v1)
+    thd_term = 2.d0 * v1_tmp * u12_tmp * (dx1_v1*grad_u12(1) + dy1_v1*grad_u12(2) + dz1_v1*grad_u12(3))
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * (fst_term + scd_term + thd_term) * v2_tmp * v2_tmp
+
+    num_gradu_squared_u_ij_mu += tmp
+  enddo
+
+  return
+end function num_gradu_squared_u_ij_mu
+
+! ---
+
+double precision function num_grad12_j12(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [v1^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_grad12_j12 = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    fst_term = 0.5d0 * tmp1 * tmp1 * v1_tmp * v1_tmp
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * fst_term * v2_tmp * v2_tmp
+
+    num_grad12_j12 += tmp
+  enddo
+
+  return
+end function num_grad12_j12
+
+! ---
+
+double precision function num_u12sq_j1bsq(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [ u12^2 (grad_1 v1)^2 ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_u12sq_j1bsq = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    scd_term = u12_tmp * u12_tmp * (dx1_v1*dx1_v1 + dy1_v1*dy1_v1 + dz1_v1*dz1_v1)
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * scd_term * v2_tmp * v2_tmp
+
+    num_u12sq_j1bsq += tmp
+  enddo
+
+  return
+end function num_u12sq_j1bsq
+
+! ---
+
+double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [ 2 u12 v1 (grad_1 u12) . (grad_1 v1) ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_u12_grad1_u12_j1b_grad1_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    thd_term = 2.d0 * v1_tmp * u12_tmp * (dx1_v1*grad_u12(1) + dy1_v1*grad_u12(2) + dz1_v1*grad_u12(3))
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * thd_term * v2_tmp * v2_tmp
+
+    num_u12_grad1_u12_j1b_grad1_j1b += tmp
+  enddo
+
+  return
+end function num_u12_grad1_u12_j1b_grad1_j1b
+
+! ---
+
+subroutine num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12 (grad_1 u12) \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: r1(3), r2(3), grad(3)
+  double precision              :: dx, dy, dz, r12, tmp0, tmp1, tmp2
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+  double precision, external    :: j1b_nucl
+  double precision, external    :: j12_mu
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp0 = j1b_nucl(r2)
+    tmp1 = 0.5d0 * j12_mu(r1, r2) * (1.d0 - derf(mu_erf * r12)) / r12
+    tmp2 = tmp0 * tmp0 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+
+    tmp_x += tmp2 * dx 
+    tmp_y += tmp2 * dy 
+    tmp_z += tmp2 * dz 
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_int2_u_grad1u_total_j1b2
+
+! ---
diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f
new file mode 100644
index 00000000..c535d0c5
--- /dev/null
+++ b/src/non_h_ints_mu/test_non_h_ints.irp.f
@@ -0,0 +1,102 @@
+program test_non_h
+ implicit none
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 50
+  my_n_pt_a_grid = 74
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!call routine_grad_squared
+ call routine_fit
+end
+
+subroutine routine_lapl_grad
+ implicit none
+ integer :: i,j,k,l
+ double precision :: grad_lapl, get_ao_tc_sym_two_e_pot,new,accu,contrib
+ double precision :: ao_two_e_integral_erf,get_ao_two_e_integral,count_n,accu_relat
+! !!!!!!!!!!!!!!!!!!!!! WARNING
+! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(1:n_max_fit_slat) = 0. to cancel (1-erf(mu*r12))^2
+ accu = 0.d0
+ accu_relat = 0.d0
+ count_n = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     grad_lapl  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
+     grad_lapl += ao_two_e_integral_erf(i, k, j, l)                        ! erf(mu r12)/r12    : comes from Lapl
+     grad_lapl += ao_non_hermit_term_chemist(k,i,l,j)                      ! \grad u(r12) . grad
+     new        = tc_grad_and_lapl_ao(k,i,l,j)
+     new       += get_ao_two_e_integral(i,j,k,l,ao_integrals_map)
+     contrib    = dabs(new - grad_lapl)
+     if(dabs(grad_lapl).gt.1.d-12)then
+      count_n += 1.d0
+      accu_relat += 2.0d0 * contrib/dabs(grad_lapl+new)
+     endif
+     if(contrib.gt.1.d-10)then
+      print*,i,j,k,l
+      print*,grad_lapl,new,contrib
+      print*,2.0d0*contrib/dabs(grad_lapl+new+1.d-12)
+     endif 
+     accu += contrib
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu      = ',accu/count_n
+ print*,'accu/rel  = ',accu_relat/count_n
+
+end
+
+subroutine routine_grad_squared
+ implicit none
+ integer :: i,j,k,l
+ double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib
+ double precision :: count_n,accu_relat
+! !!!!!!!!!!!!!!!!!!!!! WARNING
+! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(n_max_fit_slat:n_max_fit_slat+1) = 0. to cancel exp(-'mu*r12)^2)
+ accu = 0.d0
+ accu_relat = 0.d0
+ count_n = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     grad_squared  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
+     new        = tc_grad_square_ao(k,i,l,j)
+     contrib    = dabs(new - grad_squared)
+     if(dabs(grad_squared).gt.1.d-12)then
+      count_n += 1.d0
+      accu_relat += 2.0d0 * contrib/dabs(grad_squared+new)
+     endif
+     if(contrib.gt.1.d-10)then
+      print*,i,j,k,l
+      print*,grad_squared,new,contrib
+      print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12)
+     endif 
+     accu += contrib
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu      = ',accu/count_n
+ print*,'accu/rel  = ',accu_relat/count_n
+
+end
+
+subroutine routine_fit
+ implicit none
+ integer :: i,nx
+ double precision :: dx,xmax,x,j_mu,j_mu_F_x_j,j_mu_fit_gauss
+ nx = 500
+ xmax = 5.d0
+ dx = xmax/dble(nx)
+ x = 0.d0
+ print*,'coucou',mu_erf
+ do i = 1, nx
+  write(33,'(100(F16.10,X))') x,j_mu(x),j_mu_F_x_j(x),j_mu_fit_gauss(x)
+  x += dx
+ enddo
+
+end
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
new file mode 100644
index 00000000..2fd2719c
--- /dev/null
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -0,0 +1,131 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_vartc_int_chemist ...'
+  call wall_time(wall0)
+  
+  if(test_cycle_tc) then
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_vartc_int_chemist(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+          enddo
+        enddo
+      enddo
+    enddo
+  else
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_vartc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+          enddo
+        enddo
+      enddo
+    enddo
+  endif
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_vartc_int_chemist ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist ...'
+  call wall_time(wall0)
+  
+  if(test_cycle_tc)then
+    ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+!            ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j)
+          enddo
+        enddo
+      enddo
+    enddo
+  endif
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist_test ...'
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+!           ao_tc_int_chemist_test(k,i,l,j) =  ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! ao_two_e_coul(k,i,l,j) = ( k i | 1/r12 | l j ) = < l k | 1/r12 | j i > 
+  !
+  END_DOC
+
+  integer                    :: i, j, k, l
+  double precision           :: integral
+  double precision, external :: get_ao_two_e_integral
+
+  PROVIDE ao_integrals_map
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          !  < 1:k, 2:l | 1:i, 2:j > 
+          integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
+
+          ao_two_e_coul(k,i,l,j) = integral
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_hermit_dav/NEED b/src/non_hermit_dav/NEED
new file mode 100644
index 00000000..9487075c
--- /dev/null
+++ b/src/non_hermit_dav/NEED
@@ -0,0 +1 @@
+utils
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
new file mode 100644
index 00000000..78fddf54
--- /dev/null
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -0,0 +1,1156 @@
+subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  shift = 1.d-15
+  shift_current = shift
+  iteration = 1 
+  logical :: good_ortho
+  good_ortho = .False.
+  do while(n_real_eigv.ne.n.or. .not.good_ortho)
+   if(shift.gt.1.d-3)then
+    print*,'shift > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   print*,'***** iteration = ',iteration
+   print*,'shift = ',shift
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A_save
+   do i = 1, n
+    do j = 1, n
+     if(dabs(Aw(j,i)).lt.shift)then
+      Aw(j,i) = 0.d0
+     endif
+    enddo
+   enddo
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n),iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   call dsort(im_part, iorder, n)
+
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+   Aw = A_save
+   call split_matrix_degen(Aw,n,shift_current)
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   ! You track the real eigenvalues 
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec_tmp(j,i) = VR(j,list_good(iorder(i)))
+       leigvec_tmp(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+
+   if(n_real_eigv == n)then
+    allocate(S(n,n))
+    call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd)
+    print*,'accu_nd = ',accu_nd
+    double precision :: accu_nd
+    good_ortho = accu_nd .lt. 1.d-10
+    deallocate(S)
+   endif
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+   iteration += 1
+  enddo
+  do i = 1, n
+   do j = 1, n
+    reigvec(iorder_origin(j),i) = reigvec_tmp(j,i)
+    leigvec(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+end subroutine non_hrmt_diag_split_degen
+
+! ---
+
+subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: im_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  shift = 1.d-10
+  do while(n_real_eigv.ne.n.or.shift.gt.1.d-3)
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n), iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'adding random number of magnitude ',shift_current
+   Aw = A
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += shift_current * r
+   enddo
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ 
+   ! You track the real eigenvalues 
+   thr = 1.d-10
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+ 
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+ 
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec(j,i) = VR(j,list_good(iorder(i)))
+       leigvec(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+  enddo
+  if(shift.gt.1.d-3)then
+   print*,'shift > 1.d-3 !!'
+   print*,'Your matrix intrinsically contains complex eigenvalues'
+  endif
+
+end subroutine non_hrmt_real_diag_new
+
+! ---
+
+subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr, thr_cut, thr_diag, thr_norm
+  double precision              :: accu_d, accu_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
+  
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
+
+  !thr_cut = 1.d-15
+  !call cancel_small_elmts(A, n, thr_cut)
+
+  !call lapack_diag_non_sym_right(n, A, WR, WI, VR)
+  call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+  !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
+  !print *, ' right eigenvect bef' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VR(:,i)
+  !enddo
+  !print *, ' left eigenvect bef'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VL(:,i)
+  !enddo
+
+  thr_diag = 1d-06
+  thr_norm = 1d+10
+  call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.)
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
+  do i = 1, n
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    if(dabs(WI(i)) .lt. thr) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+
+  if(n_good.ne.n)then
+   print*,'there are some imaginary eigenvalues '
+   thr_diag = 1d-03
+   n_good = n
+  endif
+  allocate(list_good(n_good), iorder(n_good))
+
+  n_good = 0
+  do i = 1, n
+    n_good += 1
+    list_good(n_good) = i
+    eigval(n_good) = WR(i)
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  ASSERT(n==n_real_eigv)
+
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') eigval(i)
+  !enddo
+  !print *, ' right eigenvect aft ord' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') reigvec(:,i)
+  !enddo
+  !print *, ' left eigenvect aft ord'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') leigvec(:,i)
+  !enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  thr_diag = 10.d0
+  thr_norm = 1d+10
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+  call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
+
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
+    deallocate(S)
+    return
+
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+  !  deallocate(S)
+  !  return
+
+  else
+
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+
+    ! ---
+
+!   call impose_orthog_degen_eigvec(n, eigval, reigvec)
+!   call impose_orthog_degen_eigvec(n, eigval, leigvec)
+
+    call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
+
+
+    !call impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, eigval, leigvec, reigvec)
+
+    !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec)
+
+    ! ---
+
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+    if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
+      call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+    endif
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+
+    !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+
+    ! ---
+
+    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+    deallocate(S)
+
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig
+
+! ---
+
+subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+
+  Aw(:,:) = A(:,:)
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  thr    = 1.d-12
+  double precision, allocatable :: im_part(:)
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  if(n_good .lt. n)then
+   print*,'Removing degeneracies to remove imaginary parts'
+   allocate(im_part(n),iorder(n))
+   r = 0.d0
+   do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+   enddo
+   call dsort(im_part,iorder,n) 
+   thr = 10.d0 * dabs(im_part(1))
+   print*,'adding random numbers on the diagonal of magnitude ',thr
+   Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     print*,'r = ',r*thr
+     Aw(i,i) += thr * r
+   enddo
+   print*,'Rediagonalizing the matrix with random numbers'
+   call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+   deallocate(im_part,iorder)
+  endif
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  thr    = 1.d-5
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  allocate( list_good(n_good), iorder(n_good) )
+
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    ! L x R is already bi-orthogonal
+
+    print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    print *, ' L & T bi-orthogonality: not imposed yet'
+    print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig_random_diag
+
+! ---
+
+subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n))
+
+  Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += 10.d-10* r
+   enddo
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-15
+  n_real_eigv = 0
+  do while (i.le.n) 
+!    print*,i,dabs(WI(i))
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_real_im
+
+! ---
+
+subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n),B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:),Bw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n))
+
+  Aw(:,:) = A(:,:)
+  Bw(:,:) = B(:,:)
+  call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-10
+  n_real_eigv = 0
+  do while (i.le.n) 
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_generalized_real_im
+
+! ---
+
+subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision, allocatable :: eigval_sorted(:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+  Aw(:,:) = A(:,:)
+
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  allocate( eigval_sorted(n), iorder(n) )
+
+  n_good = 0
+  thr    = 1.d-10
+
+  do i = 1, n
+
+    iorder(i) = i
+    eigval_sorted(i) = WR(i)
+
+    if(dabs(WI(i)) .gt. thr) then
+      print*, ' Found an imaginary component to eigenvalue on i = ', i
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    else
+      n_good += 1
+    endif
+
+  enddo
+
+  n_real_eigv = n_good 
+
+  call dsort(eigval_sorted, iorder, n)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    eigval(i) = WR(i)
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( eigval_sorted, iorder )
+  deallocate( WR, WI )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    ! L x R is already bi-orthogonal
+
+    !print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    !print *, ' L & T bi-orthogonality: not imposed yet'
+    !print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig_fullvect
+
+! ---
+
+
+subroutine split_matrix_degen(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j,n_degen
+ logical :: keep_on
+ i=1
+ do while(i.lt.n)
+  if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then
+   j=1
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then
+     j+=1
+    else
+     keep_on=.False.
+     exit
+    endif
+   enddo
+   n_degen = j
+   j=0
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j+1.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then
+     Aw(i+j,i+j) += (j-n_degen/2) * shift
+     j+=1
+    else 
+     keep_on = .False.
+     exit
+    endif
+   enddo
+   Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift
+   i+=n_degen
+  else 
+   i+=1
+  endif
+ enddo
+
+end
+
+subroutine give_degen(a,n,shift,list_degen,n_degen_list)
+ implicit none
+ BEGIN_DOC
+ ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift)
+ !
+ ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, 
+ !
+ !                         list_degen(2,i) = last degenerate element of the set i.
+ END_DOC
+ double precision,intent(in) :: A(n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer, intent(out) :: list_degen(2,n),n_degen_list
+ integer :: i,j,n_degen,k
+ logical :: keep_on
+ double precision,allocatable :: Aw(:)
+ list_degen = -1
+ allocate(Aw(n))
+ Aw = A
+ i=1
+ k = 0
+ do while(i.lt.n)
+  if(dabs(Aw(i)-Aw(i+1)).lt.shift)then
+   k+=1
+   j=1
+   list_degen(1,k) = i
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i)-Aw(i+j)).lt.shift)then
+     j+=1
+    else
+     keep_on=.False.
+     exit
+    endif
+   enddo
+   n_degen = j
+   list_degen(2,k) = list_degen(1,k)-1 + n_degen
+   j=0
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j+1.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then
+     Aw(i+j) += (j-n_degen/2) * shift
+     j+=1
+    else 
+     keep_on = .False.
+     exit
+    endif
+   enddo
+   Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift
+   i+=n_degen
+  else 
+   i+=1
+  endif
+ enddo
+ n_degen_list = k
+
+end
+
+subroutine cancel_small_elmts(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j
+ do i = 1, n
+  do j = 1, n
+   if(dabs(Aw(j,i)).lt.shift)then
+    Aw(j,i) = 0.d0
+   endif
+  enddo
+ enddo
+end
+
+subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd)
+ implicit none
+ integer, intent(in) :: n
+ double precision,intent(in) :: reigvec(n,n),leigvec(n,n)
+ double precision, intent(out) :: S(n,n),accu_nd
+ BEGIN_DOC
+! retunrs the overlap matrix S = Leigvec^T Reigvec 
+!
+! and the square root of the sum of the squared off-diagonal elements of S
+ END_DOC
+ integer :: i,j
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i.ne.j) then
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+end
diff --git a/src/non_hermit_dav/gram_schmit.irp.f b/src/non_hermit_dav/gram_schmit.irp.f
new file mode 100644
index 00000000..520661b8
--- /dev/null
+++ b/src/non_hermit_dav/gram_schmit.irp.f
@@ -0,0 +1,56 @@
+subroutine bi_ortho_gram_schmidt(wi,vi,n,ni,wk,wk_schmidt)
+ implicit none
+ BEGIN_DOC
+! you enter with a set of "ni" BI-ORTHONORMAL vectors of length "n" 
+!
+! vi(j,i) = <j|vi>, wi(j,i) = <j|wi>, <vi|wj> = delta_{ij} S_ii, S_ii =<vi|wi>
+!
+! and a vector vk(j) = <j|vk>
+!
+! you go out with a vector vk_schmidt(j) = <j|vk_schmidt> 
+!
+! which is Gram-Schmidt orthonormalized with respect to the "vi"
+!
+! <vi|wk_schmidt> = 0 
+!
+! |wk_schmidt> = |wk> - \sum_{i=1}^ni (<vi|wk>/<vi|wi>) |wi> 
+!
+! according to Eq. (5), (6) of Computers Structures, Vol 56, No. 4, pp 605-613, 1995
+!
+! https://doi.org/10.1016/0045-7949(94)00565-K
+ END_DOC
+ integer, intent(in) :: n,ni
+ double precision, intent(in) :: wi(n,ni),vi(n,ni),wk(n)
+ double precision, intent(out):: wk_schmidt(n)
+ double precision :: vi_wk,u_dot_v,tmp,u_dot_u
+ double precision, allocatable :: sii(:)
+ integer :: i,j
+ allocate( sii(ni) )
+ wk_schmidt = wk
+ do i = 1, ni
+  sii(i) = u_dot_v(vi(1,i),wi(1,i),n)
+ enddo
+! do i = 1, n
+!  print*,i,'wk',wk(i)
+! enddo
+! print*,''
+! print*,''
+ do i = 1, ni
+!  print*,'i',i
+  ! Gram-Schmidt 
+  vi_wk = u_dot_v(vi(1,i),wk,n)
+  vi_wk = vi_wk / sii(i)
+!  print*,''
+  do j = 1, n
+!   print*,j,vi_wk,wi(j,i)
+   wk_schmidt(j) -= vi_wk * wi(j,i)
+  enddo
+ enddo
+ tmp = u_dot_u(wk_schmidt,n)
+ tmp = 1.d0/dsqrt(tmp)
+ wk_schmidt = tmp * wk_schmidt
+! do j = 1, n
+!  print*,j,'wk_scc',wk_schmidt(j)
+! enddo
+! pause
+end
diff --git a/src/non_hermit_dav/htilde_mat.irp.f b/src/non_hermit_dav/htilde_mat.irp.f
new file mode 100644
index 00000000..6d5101ac
--- /dev/null
+++ b/src/non_hermit_dav/htilde_mat.irp.f
@@ -0,0 +1,93 @@
+BEGIN_PROVIDER [ integer, n_mat]
+ implicit none
+ n_mat = 2
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, h_non_hermit, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, h_non_hermit_transp, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, reigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, leigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, eigval_ht, (n_mat)]
+&BEGIN_PROVIDER [ integer, n_real_ht, (n_mat)]
+ implicit none
+ integer :: i,j
+ do i = 1, n_mat
+  read(33,*)h_non_hermit(i,1:n_mat)
+ enddo
+ print*,''
+ print*,'H_mat '
+ print*,''
+ do i = 1, n_mat
+  write(*,'(1000(F16.10,X))')h_non_hermit(i,:)
+ enddo
+ do i = 1, n_mat
+  do j = 1, n_mat
+   h_non_hermit_transp(j,i) = h_non_hermit(i,j)
+  enddo
+ enddo
+ call non_hrmt_real_diag(n_mat,h_non_hermit,reigvec_ht,leigvec_ht,n_real_ht,eigval_ht)
+
+
+END_PROVIDER 
+
+
+subroutine hcalc_r_tmp(v,u,N_st,sze) ! v = H u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit(i,j) * u(j,istate)
+!      print*,i,j,h_non_hermit(i,j),u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
+
+subroutine hcalc_l_tmp(v,u,N_st,sze) ! v = H^\dagger u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit_transp(i,j) * u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
new file mode 100644
index 00000000..0d652af4
--- /dev/null
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -0,0 +1,2907 @@
+subroutine lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  ! The real part of the matrix A can be written as A = VR D VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK)
+
+end subroutine lapack_diag_non_sym
+
+
+subroutine non_sym_diag_inv_right(n,A,leigvec,reigvec,n_real_eigv,eigval)
+ implicit none
+ BEGIN_DOC
+! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+!
+! of a non hermitian matrix A(n,n)
+!
+! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+ END_DOC
+ integer, intent(in) :: n
+ double precision, intent(in) :: A(n,n)
+ double precision, intent(out) :: reigvec(n,n),leigvec(n,n),eigval(n)
+ double precision, allocatable :: Aw(:,:)
+ integer, intent(out) :: n_real_eigv
+ print*,'Computing the left/right eigenvectors ...'
+ character*1 :: JOBVL,JOBVR
+ JOBVL = "V" ! computes the left  eigenvectors 
+ JOBVR = "V" ! computes the right eigenvectors 
+ double precision, allocatable :: WR(:),WI(:),Vl(:,:),VR(:,:),S(:,:),inv_reigvec(:,:)
+ integer :: i,j
+ integer :: n_good
+ integer, allocatable :: list_good(:), iorder(:)
+ double precision :: thr
+ thr = 1.d-10
+ ! Eigvalue(n) = WR(n) + i * WI(n)
+ allocate(WR(n),WI(n),VL(n,n),VR(n,n),Aw(n,n))
+ Aw = A
+ do i = 1, n
+  do j = i+1, n
+   if(dabs(Aw(j,j)-Aw(i,i)).lt.thr)then
+     Aw(j,j)+= thr
+     Aw(i,i)-= thr
+!    if(Aw(j,i) * A(i,j) .lt.0d0  )then
+!     if(dabs(Aw(j,i) * A(i,j)).lt.thr**(1.5d0))then
+!      print*,Aw(j,j),Aw(i,i)
+!      print*,Aw(j,i) , A(i,j)
+      Aw(j,i) = 0.d0
+      Aw(i,j) = Aw(j,i)
+!     endif
+!    endif
+   endif
+  enddo
+ enddo
+ call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ ! You track the real eigenvalues 
+ n_good = 0
+! do i = 1, n
+!  write(*,'(100(F16.12,X))')A(:,i)
+! enddo
+ do i = 1, n
+  print*,'Im part of lambda = ',dabs(WI(i))
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+  else
+   print*,'Found an imaginary component to eigenvalue'
+   print*,'Re(i) + Im(i)',WR(i),WI(i)
+   write(*,'(100(F10.5,X))')VR(:,i)
+   write(*,'(100(F10.5,X))')VR(:,i+1)
+   write(*,'(100(F10.5,X))')VL(:,i)
+   write(*,'(100(F10.5,X))')VL(:,i+1)
+  endif
+ enddo
+ allocate(list_good(n_good),iorder(n_good))
+ n_good = 0
+ do i = 1, n
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+   list_good(n_good) = i
+   eigval(n_good) = WR(i)
+  endif
+ enddo
+ n_real_eigv = n_good 
+ do i = 1, n_good
+  iorder(i) = i
+ enddo
+ ! You sort the real eigenvalues 
+ call dsort(eigval,iorder,n_good)
+ do i = 1, n_real_eigv
+  do j = 1, n
+   reigvec(j,i) = VR(j,list_good(iorder(i)))
+   leigvec(j,i) = VL(j,list_good(iorder(i)))
+  enddo
+ enddo
+ allocate(inv_reigvec(n_real_eigv,n_real_eigv))
+! call get_pseudo_inverse(reigvec,n_real_eigv,n_real_eigv,n_real_eigv,inv_reigvec,n_real_eigv,thr)
+! do i = 1, n_real_eigv
+!  do j = 1, n
+!   leigvec(j,i) = inv_reigvec(i,j)
+!  enddo
+! enddo
+ allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+   do i = 1,n_real_eigv
+    write(*,'(100(F10.5,X))')S(:,i)
+   enddo
+! call lapack_diag_non_sym(n,S,WR,WI,VL,VR)
+! print*,'Eigenvalues of S'
+! do i = 1, n
+!  print*,WR(i),dabs(WI(i))
+! enddo
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+! call get_inv_half_svd(S, n_real_eigv, inv_reigvec)
+
+  double precision :: accu_d,accu_nd
+  accu_nd = 0.d0
+  accu_d = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+       accu_d += S(j,i) * S(j,i)
+      else
+       accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print*,'accu_nd = ',accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    ! L x R is already bi-orthogonal
+    !print *, ' L & T bi-orthogonality: ok'
+    return
+  else
+   print*,'PB with bi-orthonormality!!'
+   stop
+  endif
+end
+
+subroutine lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  !
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  character*1                   :: JOBVL,JOBVR,BALANC,SENSE
+  integer                       :: ILO, IHI
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision              :: ABNRM
+  integer,          allocatable :: IWORK(:)
+  double precision, allocatable :: WORK(:), SCALE_array(:), RCONDE(:), RCONDV(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  JOBVL  = "V" ! computes the left  eigenvectors 
+  JOBVR  = "V" ! computes the right eigenvectors 
+  BALANC = "B" ! Diagonal scaling and Permutation for optimization
+  SENSE  = "B"
+  lda  = n
+  ldvl = n
+  ldvr = n
+  allocate(WORK(1),SCALE_array(n),RCONDE(n),RCONDV(n),IWORK(2*n-2))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.gt.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  ! Actual dnon_hrmt_real_diag_newiagonalization 
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.ne.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  deallocate( Atmp )
+  deallocate( WORK, SCALE_array, RCONDE, RCONDV, IWORK )
+
+end subroutine lapack_diag_non_sym_new
+
+! ---
+
+subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR)
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VR(n,n)
+
+  integer                       :: i, lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:)
+
+  lda  = n
+  ldvl = 1
+  ldvr = n
+
+  allocate( Atmp(n,n), VL(1,1) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK, VL)
+
+! print *, ' JOBL = F'
+! print *, ' eigenvalues'
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') WR(i), WI(i)
+! enddo
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') VR(:,i)
+! enddo
+
+end subroutine lapack_diag_non_sym_right
+
+! ---
+
+subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j, n_good
+  double precision              :: thr, threshold, accu_d, accu_nd
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:), S(:,:), S_inv_half_tmp(:,:)
+
+  print*, ' Computing the left/right eigenvectors with lapack ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  Aw = A
+  !print *, ' matrix to diagonalize', Aw
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! ---
+  ! You track the real eigenvalues 
+
+  thr = 1d-15
+
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+    else
+      print*, ' Found an imaginary component to eigenvalue'
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+! print *, ' ordered eigenvalues'
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   print *, i, eigval(i)
+!   write(*, '(1000(F16.10,X))') reigvec(:,i)
+! enddo
+
+  ! ---
+
+  allocate( S(n_real_eigv,n_real_eigv), S_inv_half_tmp(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  threshold = 1.d-15
+  if( (accu_nd .gt. threshold) .or. (dabs(accu_d-dble(n_real_eigv)) .gt. threshold) ) then
+
+    print*, ' sum of off-diag S elements = ', accu_nd
+    print*, ' Should be zero '
+    print*, ' sum of     diag S elements = ', accu_d
+    print*, ' Should be ',n
+    print*, ' Not bi-orthonormal !!'
+    print*, ' Notice that if you are interested in ground state it is not a problem :)'
+  endif
+
+end subroutine non_hrmt_real_diag
+
+! ---
+
+subroutine lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) and another B(n,n)
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = (WR(n) + i * WI(n))
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  integer                       :: n_good
+  double precision, allocatable :: WORK(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0) then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, Atmp )
+
+end subroutine lapack_diag_general_non_sym
+
+! ---
+
+subroutine non_hrmt_general_real_diag(n, A, B, reigvec, leigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n) and B(n,n) 
+  !
+  ! A reigvec = eigval * B * reigvec
+  !
+  ! (A)^\dagger leigvec = eigval * B * leigvec
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  integer, allocatable          :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:), Bw(:,:)
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n))
+  Aw = A
+  Bw = B
+
+  call lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  ! You track the real eigenvalues 
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)) .lt. 1.d-12) then
+      n_good += 1
+    else
+      print*,'Found an imaginary component to eigenvalue'
+      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-12)then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good 
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  print*,'n_real_eigv = ', n_real_eigv
+  print*,'n           = ', n
+  do i = 1, n_real_eigv
+    print*,i,'eigval(i) = ', eigval(i) 
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+end subroutine non_hrmt_general_real_diag
+
+! ---
+
+subroutine impose_biorthog_qr(m, n, thr_d, thr_nd, Vl, Vr)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:)
+
+  ! ---
+
+  call check_biorthog_binormalize(m, n, Vl, Vr, thr_d, thr_nd, .false.)
+  
+  ! ---
+  
+  allocate(S(n,n))
+  call dgemm( 'T', 'N', n, n, m, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  return
+end subroutine impose_biorthog_qr
+
+! ---
+
+subroutine impose_biorthog_lu(m, n, Vl, Vr, S)
+
+  implicit none 
+  integer, intent(in)             :: m, n
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n), S(n,n)
+
+  integer                         :: i, j
+  integer                         :: INFO
+  double precision                :: nrm
+  integer,          allocatable   :: IPIV(:)
+  double precision, allocatable   :: L(:,:), tmp(:,:), vectmp(:)
+  !double precision, allocatable   :: T(:,:), ll(:,:), rr(:,:), tt(:,:)
+
+  !allocate( T(n,n) )
+  !T(:,:) = S(:,:)
+
+  print *, ' apply LU decomposition ...'
+
+  ! -------------------------------------------------------------------------------------
+  !                           LU factorization of S: S = P x L x U
+
+  allocate( IPIV(n) )
+
+  call dgetrf(n, n, S, n, IPIV, INFO)
+  if(INFO .ne. 0) then
+    print*, 'dgetrf failed !!', INFO
+    stop
+  endif
+
+  ! check | S - P x L x U |
+  !allocate( ll(n,n), rr(n,n), tmp(n,n) )
+  !ll = S
+  !rr = S
+  !do i = 1, n-1
+  !  ll(i,i) = 1.d0
+  !  do j = i+1, n
+  !    ll(i,j) = 0.d0
+  !    rr(j,i) = 0.d0
+  !  enddo
+  !enddo
+  !ll(n,n) = 1.d0
+  !call dgemm( 'N', 'N', n, n, n, 1.d0          &
+  !          , ll, size(ll, 1), rr, size(rr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  ! deallocate(ll, rr)
+  !allocate( vectmp(n) )
+  !do j = n-1, 1, -1
+  !  i = IPIV(j)
+  !  if(i.ne.j) then
+  !    print *, j, i
+  !    vectmp(:) = tmp(i,:)
+  !    tmp(i,:)  = tmp(j,:)
+  !    tmp(j,:)  = vectmp(:)
+  !  endif
+  !enddo
+  !deallocate( vectmp )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i) - T(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R - S| =', nrm
+  !stop
+
+  ! ------
+  ! inv(L) 
+  ! ------
+
+  allocate( L(n,n) )
+  L(:,:) = S(:,:)
+
+  call dtrtri("L", "U", n, L, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+  do i = 1, n-1
+    L(i,i) = 1.d0
+    do j = i+1, n
+      L(i,j) = 0.d0
+    enddo
+  enddo
+  L(n,n) = 1.d0
+
+  ! ------
+  ! inv(U) 
+  ! ------
+  
+  call dtrtri("U", "N", n, S, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+
+  do i = 1, n-1
+    do j = i+1, n
+      S(j,i) = 0.d0
+    enddo
+  enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(U) 
+  !                                           Vl' = inv(L) x inv(P) x Vl
+
+  ! inv(P) x Vl
+  allocate( vectmp(n) )
+  do j = n-1, 1, -1
+    i = IPIV(j)
+    if(i.ne.j) then
+      vectmp(:) = L(:,j)
+      L(:,j)    = L(:,i)
+      L(:,i)    = vectmp(:)
+    endif
+  enddo
+  deallocate( vectmp )
+
+  ! Vl'
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'T', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), L, size(L, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(L)
+
+  Vl = tmp
+  deallocate(tmp)
+
+  ! ---
+
+  ! Vr x inv(U) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), S, size(S, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  Vr = tmp
+  deallocate(tmp)
+
+  !allocate( tmp(n,n) )
+  !call dgemm( 'T', 'N', n, n, m, 1.d0          &
+  !          , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R| =', nrm
+  !stop
+
+  return
+end subroutine impose_biorthog_lu
+
+! ---
+
+subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, stop_ifnot)
+
+  implicit none
+  integer,          intent(in)  :: n, m
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: A(n,n), eigval(m), leigvec(n,m), reigvec(n,m), thr_diag, thr_norm
+ 
+  integer                       :: i, j
+  double precision              :: tmp, tmp_abs, tmp_nrm, tmp_rel, tmp_dif
+  double precision              :: V_nrm, U_nrm
+  double precision, allocatable :: Mtmp(:,:)
+
+  allocate( Mtmp(n,m) )
+  
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+    
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * reigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + reigvec(i,j) * reigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
+
+  enddo
+
+  if(tmp_abs.lt.10.d-10)then
+   tmp_rel = thr_diag/10.d0
+  else
+   tmp_rel = tmp_abs / tmp_nrm
+  endif
+  tmp_dif = dabs(V_nrm - dble(m))
+
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in right-eigenvectors'
+    print *, ' err tol   = ',thr_diag, thr_norm
+    print *, '(tmp_rel .gt. thr_diag) = ',(tmp_rel .gt. thr_diag)
+    print *, '(tmp_dif .gt. thr_norm) = ',(tmp_dif .gt. thr_norm)
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'T', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), leigvec, size(leigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * leigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + leigvec(i,j) * leigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
+
+  enddo
+
+  if(tmp_abs.lt.10.d-10)then
+   tmp_rel = thr_diag/10.d0
+  else
+   tmp_rel = tmp_abs / tmp_nrm
+  endif
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in left-eigenvectors'
+    print *, ' err tol   = ',thr_diag, thr_norm
+    print *, '(tmp_rel .gt. thr_diag) = ',(tmp_rel .gt. thr_diag)
+    print *, '(tmp_dif .gt. thr_norm) = ',(tmp_dif .gt. thr_norm)
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  deallocate( Mtmp )
+
+end subroutine check_EIGVEC
+
+! ---
+
+subroutine check_degen(n, m, eigval, leigvec, reigvec)
+
+  implicit none
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: eigval(m)
+  double precision, intent(inout) :: leigvec(n,m), reigvec(n,m)
+ 
+  integer                         :: i, j
+  double precision                :: ei, ej, de, de_thr, accu_nd
+  double precision, allocatable   :: S(:,:)
+
+  de_thr = 1d-6
+
+  do i = 1, m-1
+    ei = eigval(i)
+
+    do j = i+1, m
+      ej = eigval(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+
+        leigvec(:,i) = 0.d0
+        leigvec(:,j) = 0.d0
+        leigvec(i,i) = 1.d0
+        leigvec(j,j) = 1.d0
+
+        reigvec(:,i) = 0.d0
+        reigvec(:,j) = 0.d0
+        reigvec(i,i) = 1.d0
+        reigvec(j,j) = 1.d0
+
+      endif
+
+    enddo
+  enddo
+
+  ! ---
+
+  allocate( S(m,m) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', m, m, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+  print *, ' check_degen: L & T bi-orthogonality: ok'
+  print *, ' accu_nd = ', accu_nd
+
+  if( accu_nd .lt. 1d-8 ) then
+    return
+  else
+    stop
+  endif
+
+end subroutine check_degen
+
+! ---
+
+subroutine impose_weighted_orthog_svd(n, m, W, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m), W(n,n)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_weighted_orthog_svd
+
+! ---
+
+subroutine impose_orthog_svd(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      write(*,*) ' D(i) = ', D(i)
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_svd
+
+! ---
+
+subroutine impose_orthog_svd_overlap(n, m, C, overlap)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in   ) :: overlap(n,n)
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:), Stmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  print *, ' apply SVD to orthogonalize vectors'
+
+  ! ---
+
+  ! S = C.T x overlap x C
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , overlap, size(overlap, 1), C, size(C, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
+            , C, size(C, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  ! S = C.T x overlap x C
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , overlap, size(overlap, 1), C, size(C, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
+            , C, size(C, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+  deallocate(S)
+
+end subroutine impose_orthog_svd_overlap
+
+! ---
+
+subroutine impose_orthog_GramSchmidt(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, k
+  double precision                :: Ojk, Ojj, fact_ct
+  double precision, allocatable   :: S(:,:)
+
+  print *, ''
+  print *, ' apply Gram-Schmidt to orthogonalize & normalize vectors'
+  print *, ''
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap bef Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+
+  do k = 2, m
+    do j = 1, k-1
+
+      Ojk = 0.d0    
+      Ojj = 0.d0    
+      do i = 1, n
+        Ojk = Ojk + C(i,j) * C(i,k)
+        Ojj = Ojj + C(i,j) * C(i,j)
+      enddo
+      fact_ct = Ojk / Ojj
+
+      do i = 1, n
+        C(i,k) = C(i,k) - fact_ct * C(i,j)
+      enddo
+
+    enddo
+  enddo
+
+  do k = 1, m
+    fact_ct = 0.d0    
+    do i = 1, n
+      fact_ct = fact_ct + C(i,k) * C(i,k)
+    enddo
+    fact_ct = dsqrt(fact_ct)
+    do i = 1, n
+      C(i,k) = C(i,k) / fact_ct
+    enddo
+  enddo
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap aft Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_GramSchmidt
+
+! ---
+
+subroutine impose_orthog_ones(n, deg_num, C)
+
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  integer,          intent(in)    :: deg_num(n)
+  double precision, intent(inout) :: C(n,n)
+
+  integer                         :: i, j, ii, di, dj
+
+  print *, ''
+  print *, ' orthogonalize vectors by hand'
+  print *, ''
+
+  do i = 1, n-1
+    di = deg_num(i)
+
+    if(di .gt. 1) then
+
+      do ii = 1, di
+        C(:     ,i+ii-1) = 0.d0
+        C(i+ii-1,i+ii-1) = 1.d0
+      enddo
+
+      do j = i+di+1, n
+        dj = deg_num(j)
+        if(dj .eq. di) then
+          do ii = 1, dj
+            C(:,     j+ii-1) = 0.d0
+            C(j+ii-1,j+ii-1) = 1.d0
+          enddo
+        endif
+      enddo
+
+    endif
+  enddo 
+
+end subroutine impose_orthog_ones
+
+! ---
+
+subroutine impose_orthog_degen_eigvec(n, e0, C0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: C0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: C(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+!  call impose_orthog_ones(n, deg_num, C0)
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+    !if(m.eq.3) then
+  
+      allocate(C(n,m))
+      do j = 1, m
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      ! C <= C U sigma^-0.5
+      call impose_orthog_svd(n, m, C)
+
+      ! ---
+
+      ! C = I
+      !C = 0.d0
+      !do j = 1, m
+      !  C(i+j-1,j) = 1.d0
+      !enddo
+
+      ! ---
+
+!      call impose_orthog_GramSchmidt(n, m, C)
+
+      ! ---
+
+      do j = 1, m
+        C0(1:n,i+j-1) = C(1:n,j)
+      enddo
+      deallocate(C)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_degen_eigvec 
+
+! ---
+
+subroutine get_halfinv_svd(n, S)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(inout) :: S(n,n)
+
+  integer                         :: num_linear_dependencies
+  integer                         :: i, j, k
+  double precision                :: accu_d, accu_nd, thresh
+  double precision, parameter     :: threshold = 1.d-6
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+  double precision, allocatable   :: S0(:,:), Stmp(:,:), Stmp2(:,:)
+
+  allocate( S0(n,n) )
+  S0(1:n,1:n) = S(1:n,1:n)
+
+  allocate(U(n,n), Vt(n,n), D(n))
+  call svd(S, n, U, n, D, Vt, n, n, n)
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  S(:,:) = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+          S(i,j) = S(i,j) + U(i,k) * D(k) * Vt(k,j)
+        enddo
+      enddo
+    endif
+  enddo
+  deallocate(U, D, Vt)
+
+  allocate( Stmp(n,n), Stmp2(n,n) )
+  Stmp  = 0.d0
+  Stmp2 = 0.d0
+  ! S^-1/2 x S
+  call dgemm( 'N', 'N', n, n, n, 1.d0        &
+            , S, size(S, 1), S0, size(S0, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  ! ( S^-1/2 x S ) x S^-1/2
+  call dgemm( 'N', 'N', n, n, n, 1.d0            &
+            , Stmp, size(Stmp, 1), S, size(S, 1) &
+            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  thresh  = 1.d-10
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+       accu_d += Stmp2(j,i)
+      else 
+       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+  if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+    print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+    print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+    do i = 1, n
+      write(*,'(1000(F16.10,X))') Stmp2(i,:)
+    enddo
+    stop
+  endif
+
+  deallocate(S0, Stmp, Stmp2)
+
+end subroutine get_halfinv_svd
+
+! ---
+
+subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(n,m), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:)
+
+  !print *, ' check bi-orthonormality'
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if(S(i,i) .lt. 0.d0) then
+    !if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      !S(i,i) = 1.d0
+      S(i,i) = -S(i,i)
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+
+    enddo
+
+  endif
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_biorthog_binormalize
+
+! ---
+
+subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m), W(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision, allocatable :: SS(:,:), tmp(:,:)
+
+  print *, ' check weighted bi-orthogonality'
+
+  ! ---
+
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0            &
+            , tmp, size(tmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print *, ' accu_nd = ', accu_nd
+  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_weighted_biorthog
+
+! ---
+
+subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m)
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: thr_d, thr_nd
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision, allocatable :: SS(:,:)
+
+  !print *, ' check bi-orthogonality'
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_biorthog
+
+! ---
+
+subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: V(n,m)
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+
+  S = 0.d0
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , V, size(V, 1), V, size(V, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  !print*, '    diag acc: ', accu_d
+  !print*, ' nondiag acc: ', accu_nd
+
+end subroutine check_orthog
+
+! ---
+
+subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:), S_inv_half(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+      !call impose_orthog_GramSchmidt(n, m, L)
+      !call impose_orthog_GramSchmidt(n, m, R)
+
+      ! ---
+
+      !allocate(S(m,m))
+      !call dgemm( 'T', 'N', m, m, n, 1.d0      &
+      !          , L, size(L, 1), R, size(R, 1) &
+      !          , 0.d0, S, size(S, 1) )
+      !allocate(S_inv_half(m,m))
+      !call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      !if(complex_root) then
+      !  print*, ' complex roots in inv_half !!! '
+      !  stop
+      !endif
+      !call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      !deallocate(S, S_inv_half)
+
+      call impose_biorthog_svd(n, m, L, R)
+
+      !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+  
+      call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+
+      allocate(S(m,m))
+      call check_biorthog(n, m, L, R, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+      deallocate(S)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(in)    :: e0(n), W0(n,n), C0(n,n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), C(:,:)
+  double precision, allocatable   :: S(:,:), S_inv_half(:,:), tmp(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+      allocate(C(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+
+
+      ! TODO:
+      ! select C correctly via overlap
+      ! or via selecting degen in HF
+
+      !call max_overlap_qr(n, m, C, L)
+      !call max_overlap_qr(n, m, C, R)
+
+
+      allocate(tmp(m,n))
+      allocate(S(m,m))
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , L, size(L, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), C, size(C, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, L)
+      !call max_overlap_invprod(n, m, S, L)
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , C, size(C, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, R)
+      !call max_overlap_invprod(n, m, S, R)
+
+      deallocate(S, tmp)
+
+      ! ---
+  
+      allocate(S(m,m), S_inv_half(m,m))
+      call dgemm( 'T', 'N', m, m, n, 1.d0      &
+                , L, size(L, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+      call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      if(complex_root)then
+        call impose_biorthog_svd(n, m, L, R)
+        !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+      else
+        call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      endif
+      deallocate(S, S_inv_half)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R, C)
+
+    endif
+  enddo
+
+end subroutine impose_unique_biorthog_degen_eigvec 
+
+! ---
+
+subroutine max_overlap_qr(m, n, S0, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S0(n,n)
+  double precision, intent(inout) :: V(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+
+  allocate(S(n,n))
+  S = S0
+
+  ! ---
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! get Q in S matrix
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  ! ---
+
+  ! V0.T <-- Q.T x V0.T, where Q = S
+
+  allocate( tmp(n,m) )
+
+  call dgemm( 'T', 'T', n, m, n, 1.d0      &
+            , S, size(S, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(S)
+
+  do i = 1, n
+    do j = 1, m
+      V(j,i) = tmp(i,j)
+    enddo
+  enddo
+
+  deallocate(tmp)
+
+  ! ---
+
+  return
+end subroutine max_overlap_qr
+
+! ---
+
+subroutine max_overlap_invprod(n, m, S, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S(m,m)
+  double precision, intent(inout) :: V(n,m)
+
+  integer                         :: i
+  double precision, allocatable   :: invS(:,:), tmp(:,:)
+
+  allocate(invS(m,m))
+  call get_inverse(S, size(S, 1), m, invS, size(invS, 1))
+  print *, ' overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+  print *, ' inv overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') invS(i,:)
+  enddo
+
+  allocate(tmp(n,m))
+  tmp = V
+
+  call dgemm( 'N', 'N', n, m, m, 1.d0        &
+            , tmp, size(tmp, 1), invS, size(invS, 1) &
+            , 0.d0, V, size(V, 1) )
+
+  deallocate(tmp, invS)
+
+  return
+end subroutine max_overlap_invprod
+
+! ---
+
+subroutine impose_biorthog_svd(n, m, L, R)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: L(n,m), R(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), V(:,:), Vt(:,:), D(:)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  allocate(V(m,m))
+  do i = 1, m
+    do j = 1, m
+      V(j,i) = Vt(i,j)
+    enddo
+  enddo
+  deallocate(Vt)
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- R x V
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , R, size(R, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(V)
+  ! R <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      R(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  ! tmp <-- L x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , L, size(L, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(U)
+  ! L <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      L(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_biorthog_svd
+
+! ---
+
+subroutine impose_weighted_biorthog_qr(m, n, thr_d, thr_nd, Vl, W, Vr)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(m,n), W(m,m), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:), Stmp(:,:)
+
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+  
+  ! ---
+  
+  allocate(Stmp(n,m), S(n,n))
+  call dgemm( 'T', 'N', n, m, m, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', n, n, m, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+
+  return
+end subroutine impose_weighted_biorthog_qr
+
+! ---
+
+subroutine check_weighted_biorthog_binormalize(n, m, Vl, W, Vr, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(n,m), W(n,n), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:), Stmp(:,:)
+
+  print *, ' check weighted bi-orthonormality'
+
+  ! ---
+
+  allocate(Stmp(m,n), S(m,m))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      S(i,i) = 1.d0
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print*, '    diag acc: ', accu_d
+  print*, ' nondiag acc: ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      print *, i, S(i,i)
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+    enddo
+
+  endif
+
+  ! ---
+
+  allocate(Stmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print *, '    diag acc: ', accu_d
+  print *, ' nondiag acc: ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' weighted biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_weighted_biorthog_binormalize
+
+! ---
+
+subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: overlap(n,n)
+  double precision, intent(inout) :: L(n,m), R(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:),Stmp(:,:)
+  double precision, allocatable   :: U(:,:), V(:,:), Vt(:,:), D(:)
+
+  ! ---
+
+  allocate(S(m,m),Stmp(n,m))
+
+  ! S = C.T x overlap x C
+  call dgemm( 'N', 'N', n, m, n, 1.d0      &
+            , overlap, size(overlap, 1), R, size(R, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  allocate(V(m,m))
+  do i = 1, m
+    do j = 1, m
+      V(j,i) = Vt(i,j)
+    enddo
+  enddo
+  deallocate(Vt)
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- R x V
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , R, size(R, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(V)
+  ! R <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      R(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  ! tmp <-- L x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , L, size(L, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(U)
+  ! L <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      L(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m),Stmp(n,m))
+  ! S = C.T x overlap x C
+  call dgemm( 'N', 'N', n, m, n, 1.d0      &
+            , overlap, size(overlap, 1), R, size(R, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  return
+end subroutine impose_weighted_biorthog_svd
+
+! ---
+
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
new file mode 100644
index 00000000..8db044d3
--- /dev/null
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -0,0 +1,670 @@
+subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1d0
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' orthog between degen eigenvect' 
+    print *, ' '
+    double precision, allocatable :: S_nh_inv_half(:,:)
+    allocate(S_nh_inv_half(n,n))
+    logical :: complex_root
+    deallocate(S_nh_inv_half)
+    call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
+    call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ',accu_nd
+      print*,'Must be a deep problem ...'
+      stop
+     endif
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+
+subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1.d0
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    if(complex_root) then 
+     print *, ' '
+     print *, ' '
+     print *, ' orthog between degen eigenvect' 
+     print *, ' '
+     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
+     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S,  thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) 
+
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ', accu_nd
+      call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root)
+      if(complex_root)then 
+       call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
+      else
+       print*,'S^{-1/2} exists !!'
+       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+      endif
+     endif
+    else ! the matrix S^{-1/2} exists
+     print*,'S^{-1/2} exists !!'
+     call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+  double precision :: thr_norm=1d0
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' Using impose_unique_biorthog_degen_eigvec' 
+    print *, ' '
+    ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+    call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    print*,'accu_nd = ',accu_nd
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root)
+     if(complex_root)then 
+      print*,'S^{-1/2} does not exits, using QR bi-orthogonalization'
+      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
+     else
+      print*,'S^{-1/2} exists !!'
+      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+     endif
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
diff --git a/src/non_hermit_dav/project.irp.f b/src/non_hermit_dav/project.irp.f
new file mode 100644
index 00000000..c04719ac
--- /dev/null
+++ b/src/non_hermit_dav/project.irp.f
@@ -0,0 +1,53 @@
+subroutine h_non_hermite(v,u,Hmat,a,N_st,sze) 
+ implicit none
+ BEGIN_DOC
+ ! Template of routine for the application of H
+ !
+ ! Here, it is done with the Hamiltonian matrix 
+ !
+ ! on the set of determinants of psi_det 
+ !
+ ! Computes $v = a * H | u \rangle$ 
+ !
+ END_DOC
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat(sze,sze), a
+ double precision, intent(inout)  :: v(sze,N_st)
+ integer :: i,j,k 
+ do k = 1, N_st
+  do j = 1, sze
+   do i = 1, sze
+    v(i,k) += a * u(j,k) * Hmat(i,j)
+   enddo
+  enddo
+ enddo
+end
+
+
+subroutine exp_tau_H(u,v,hmat,tau,et,N_st,sze)
+ implicit none
+ BEGIN_DOC
+! realises v = (1 - tau (H - et)) u
+ END_DOC
+ integer, intent(in) :: N_st,sze
+ double precision, intent(in) :: hmat(sze,sze), u(sze,N_st), tau, et
+ double precision, intent(out):: v(sze,N_st)
+ double precision :: a
+ integer :: i,j
+ v = (1.d0 + tau * et) * u 
+ a = -1.d0 * tau
+ call h_non_hermite(v,u,Hmat,a,N_st,sze)
+end
+
+double precision function project_phi0(u,Hmat0,N_st,sze)
+ implicit none
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat0(sze)
+ integer :: j
+ project_phi0 = 0.d0
+ do j = 1, sze
+  project_phi0 += u(j,1) * Hmat0(j) 
+ enddo
+ project_phi0 *= 1.d0 / u(1,1)
+end
+
diff --git a/src/non_hermit_dav/utils.irp.f b/src/non_hermit_dav/utils.irp.f
new file mode 100644
index 00000000..7f331a6b
--- /dev/null
+++ b/src/non_hermit_dav/utils.irp.f
@@ -0,0 +1,325 @@
+
+subroutine get_inv_half_svd(matrix, n, matrix_inv_half)
+
+  BEGIN_DOC
+  !   :math:`X = S^{-1/2}` obtained by SVD
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: num_linear_dependencies
+  integer                       :: LDA, LDC
+  integer                       :: info, i, j, k
+  double precision, parameter   :: threshold = 1.d-6
+  double precision, allocatable :: U(:,:),Vt(:,:), D(:),matrix_half(:,:),D_half(:)
+
+  double precision :: accu_d,accu_nd
+
+  LDA = size(matrix, 1)
+  LDC = size(matrix_inv_half, 1)
+  if(LDA .ne. LDC) then
+    print*, ' LDA != LDC'
+    stop
+  endif
+
+  print*, ' n   = ', n
+  print*, ' LDA = ', LDA
+  print*, ' LDC = ', LDC
+
+  double precision,allocatable :: WR(:),WI(:),VL(:,:),VR(:,:)
+  allocate(WR(n),WI(n),VL(n,n),VR(n,n))
+  call lapack_diag_non_sym(n,matrix,WR,WI,VL,VR)
+  do i = 1, n
+    print*,'WR,WI',WR(i),WI(i)
+  enddo
+
+
+  allocate(U(LDC,n), Vt(LDA,n), D(n))
+
+  call svd(matrix, LDA, U, LDC, D, Vt, LDA, n, n)
+  double precision, allocatable :: tmp1(:,:),tmp2(:,:),D_mat(:,:)
+  allocate(tmp1(n,n),tmp2(n,n),D_mat(n,n),matrix_half(n,n),D_half(n))
+  D_mat = 0.d0
+  do i = 1,n
+   D_mat(i,i) = D(i)
+  enddo
+  ! matrix = U D Vt
+  ! tmp1 = U D
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , U, size(U, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  ! tmp2 = tmp1 X Vt = matrix
+  tmp2 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , tmp1, size(tmp1, 1), Vt, size(Vt, 1) &
+            , 0.d0, tmp2, size(tmp2, 1) )
+  print*,'Checking the recomposition of the matrix'
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp2(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp2(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+  print*,'passed the recomposition'
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D_half(i) = dsqrt(D(i))
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  matrix_inv_half = 0.d0
+  matrix_half = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+!          matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(k,j)
+           matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(j,k)
+           matrix_half(i,j) = matrix_half(i,j) + U(i,k) * D_half(k) * Vt(j,k)
+        enddo
+      enddo
+    endif
+  enddo
+  print*,'testing S^1/2 * S^1/2= S'
+  ! tmp1 = S^1/2 X S^1/2
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , matrix_half, size(matrix_half, 1), matrix_half, size(matrix_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp1(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+!  print*,'S inv half'
+!  do i = 1, n
+!   write(*, '(1000(F16.10,X))') matrix_inv_half(i,:)
+!  enddo
+
+  double precision, allocatable :: pseudo_inverse(:,:),identity(:,:)
+  allocate( pseudo_inverse(n,n),identity(n,n))
+  call get_pseudo_inverse(matrix,n,n,n,pseudo_inverse,n,threshold)
+ 
+  ! S^-1 X S = 1
+!  identity = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+!            , matrix, size(matrix, 1), pseudo_inverse, size(pseudo_inverse, 1) &
+!            , 0.d0, identity, size(identity, 1) )
+  print*,'Checking  S^-1/2 X S^-1/2 = S^-1 ?'
+  ! S^-1/2 X S^-1/2 = S^-1 ?
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            ,matrix_inv_half, size(matrix_inv_half, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(1.d0 - pseudo_inverse(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - pseudo_inverse(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+  stop
+!
+!  ! ( S^-1/2 x S ) x S^-1/2
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , Stmp, size(Stmp, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  ! S^-1/2 x ( S^-1/2 x S )
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , matrix_inv_half, size(matrix_inv_half, 1), Stmp, size(Stmp, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+ 
+!  do i = 1, n
+!    do j = 1, n
+!      if(i==j) then
+!       accu_d += Stmp2(j,i)
+!      else 
+!       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+!      endif
+!    enddo
+!  enddo
+!  accu_nd = dsqrt(accu_nd)
+!  print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+!  print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+!  do i = 1, n
+!   write(*,'(1000(F16.10,X))') Stmp2(i,:)
+!  enddo
+
+  !double precision  :: thresh
+  !thresh = 1.d-10
+  !if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+  !  stop
+  !endif
+
+end subroutine get_inv_half_svd
+
+! ---
+
+subroutine get_inv_half_nonsymmat_diago(matrix, n, matrix_inv_half, complex_root)
+
+  BEGIN_DOC
+  ! input:  S = matrix
+  ! output: S^{-1/2} = matrix_inv_half obtained by diagonalization
+  !
+  ! S = VR D VL^T
+  !   = VR D^{1/2} D^{1/2} VL^T
+  !   = VR D^{1/2} VL^T VR D^{1/2} VL^T
+  !   = S^{1/2} S^{1/2} with S = VR D^{1/2} VL^T 
+  !
+  ! == > S^{-1/2} = VR D^{-1/2} VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  logical,          intent(out) :: complex_root
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: i, j
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), S(:,:), S_diag(:)
+  double precision, allocatable :: tmp1(:,:), D_mat(:,:)
+
+  complex_root = .False.
+
+  matrix_inv_half = 0.D0
+  print*,'Computing S^{-1/2}'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n))
+  call lapack_diag_non_sym(n, matrix, WR, WI, VL, VR)
+
+  allocate(S(n,n))
+  call check_biorthog(n, n, VL, VR, accu_d, accu_nd, S)
+  print*,'accu_nd S^{-1/2}',accu_nd
+  if(accu_nd.gt.1.d-10) then
+    complex_root = .True. ! if vectors are not bi-orthogonal return 
+    print*,'Eigenvectors of S are not bi-orthonormal, skipping S^{-1/2}'
+    return
+  endif
+
+  allocate(S_diag(n))
+  do i = 1, n
+    S_diag(i) = 1.d0/dsqrt(S(i,i))
+    if(dabs(WI(i)).gt.1.d-20.or.WR(i).lt.0.d0)then ! check that eigenvalues are real and positive 
+     complex_root = .True.
+     print*,'Eigenvalues of S have imaginary part '
+     print*,'WR(i),WI(i)',WR(i), WR(i)
+     print*,'Skipping S^{-1/2}'
+     return
+    endif
+  enddo
+  deallocate(S)
+
+  if(complex_root) return
+
+  ! normalization of vectors 
+  do i = 1, n
+    if(S_diag(i).eq.1.d0) cycle
+    do j = 1,n
+      VL(j,i) *= S_diag(i)
+      VR(j,i) *= S_diag(i)
+    enddo
+  enddo
+  deallocate(S_diag)
+
+  allocate(tmp1(n,n), D_mat(n,n))
+
+  D_mat = 0.d0
+  do i = 1, n
+    D_mat(i,i) = 1.d0/dsqrt(WR(i))
+  enddo
+  deallocate(WR, WI)
+
+  ! tmp1 = VR D^{-1/2} 
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                &
+            , VR, size(VR, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  deallocate(VR, D_mat)
+
+  ! S^{-1/2} = tmp1 X VL^T 
+  matrix_inv_half = 0.d0
+  call dgemm( 'N', 'T', n, n, n, 1.d0              &
+            , tmp1, size(tmp1, 1), VL, size(VL, 1) &
+            , 0.d0, matrix_inv_half, size(matrix_inv_half, 1) )
+  deallocate(tmp1, VL)
+
+end
+
+! ---
+
+subroutine bi_ortho_s_inv_half(n,leigvec,reigvec,S_nh_inv_half)
+ implicit  none
+ integer, intent(in) :: n
+ double precision, intent(in) :: S_nh_inv_half(n,n)
+ double precision, intent(inout) :: leigvec(n,n),reigvec(n,n)
+ BEGIN_DOC 
+ ! bi-orthonormalization of left and right vectors 
+ ! 
+ ! S = VL^T VR 
+ !
+ ! S^{-1/2} S S^{-1/2} = 1 = S^{-1/2} VL^T VR S^{-1/2} = VL_new^T VR_new
+ !
+ ! VL_new = VL (S^{-1/2})^T
+ ! 
+ ! VR_new = VR S^{^{-1/2}}
+ END_DOC
+ double precision,allocatable :: vl_tmp(:,:),vr_tmp(:,:)
+ print*,'Bi-orthonormalization using S^{-1/2}'
+ allocate(vl_tmp(n,n),vr_tmp(n,n))
+ vl_tmp = leigvec
+ vr_tmp = reigvec
+ ! VL_new = VL (S^{-1/2})^T
+ call dgemm( 'N', 'T', n, n, n, 1.d0                                            &
+             , vl_tmp, size(vl_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, leigvec, size(leigvec, 1) )
+ ! VR_new = VR S^{^{-1/2}}
+ call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+             , vr_tmp, size(vr_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, reigvec, size(reigvec, 1) )
+ double precision :: accu_d, accu_nd
+ double precision,allocatable :: S(:,:) 
+ allocate(S(n,n)) 
+ call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+ if(dabs(accu_d - n).gt.1.d-10 .or. accu_nd .gt.1.d-8 )then
+  print*,'Pb in bi_ortho_s_inv_half !!'
+  print*,'accu_d =',accu_d
+  print*,'accu_nd =',accu_nd
+  stop
+ endif
+end
diff --git a/src/nuclei/EZFIO.cfg b/src/nuclei/EZFIO.cfg
index 34c27c46..060eede6 100644
--- a/src/nuclei/EZFIO.cfg
+++ b/src/nuclei/EZFIO.cfg
@@ -37,3 +37,27 @@ type: logical
 doc: If true, the calculation uses periodic boundary conditions
 interface: ezfio, provider, ocaml
 default: false
+[n_pts_charge]
+type: integer
+doc: Number of point charges to be added to the potential
+interface: ezfio
+default: 0                                                                                                                
+
+[pts_charge_z]
+type: double precision
+doc: Charge associated to each point charge
+interface: ezfio
+size: (nuclei.n_pts_charge)
+
+[pts_charge_coord]
+type: double precision
+doc: Coordinate of each point charge.
+interface: ezfio
+size: (nuclei.n_pts_charge,3)
+
+[point_charges]
+type: logical
+doc: If |true|, point charges (see nuclei/write_pt_charges.py) are added to the one-electron potential
+interface: ezfio,provider,ocaml
+default: False
+
diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f
index c1b5f52f..3c04316f 100644
--- a/src/nuclei/nuclei.irp.f
+++ b/src/nuclei/nuclei.irp.f
@@ -205,6 +205,9 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ]
        enddo
      enddo
      nuclear_repulsion *= 0.5d0
+     if(point_charges)then
+      nuclear_repulsion += pt_chrg_nuclei_interaction + pt_chrg_interaction
+     endif
    end if
 
    call write_time(6)
diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f
new file mode 100644
index 00000000..b955537f
--- /dev/null
+++ b/src/nuclei/point_charges.irp.f
@@ -0,0 +1,209 @@
+! ---
+
+
+BEGIN_PROVIDER [ integer, n_pts_charge  ]
+  implicit none
+  BEGIN_DOC
+! Number of point charges to be added to the potential
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+    
+    call ezfio_has_nuclei_n_pts_charge(has)
+    if (has) then
+      write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
+      call ezfio_get_nuclei_n_pts_charge(n_pts_charge)
+    else
+      print *, 'nuclei/n_pts_charge not found in EZFIO file'
+      stop 1
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_pts_charge with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
+
+  BEGIN_DOC
+  ! Charge associated to each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_nuclei_pts_charge_z(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_z with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
+      call ezfio_get_nuclei_pts_charge_z(pts_charge_z)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_z with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, n_pts_charge
+      pts_charge_z(i) = 0.d0
+    enddo
+
+  endif
+ print*,'Point charges '
+ do i = 1, n_pts_charge
+  print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
+ enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
+
+  BEGIN_DOC
+  ! Coordinates of each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_nuclei_pts_charge_coord(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_coord with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+     double precision, allocatable  :: buffer(:,:)
+     allocate (buffer(n_pts_charge,3))
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
+      call ezfio_get_nuclei_pts_charge_coord(buffer)
+      integer :: i,j
+      do i=1,3
+        do j=1,n_pts_charge
+          pts_charge_coord(j,i) = buffer(j,i)
+        enddo
+      enddo
+     deallocate(buffer)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_coord with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    do i = 1, n_pts_charge
+      pts_charge_coord(i,:) = 0.d0
+    enddo
+
+  endif
+ print*,'Coordinates for the point charges '
+ do i = 1, n_pts_charge
+  write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3)
+ enddo
+
+END_PROVIDER
+
+! ---
+BEGIN_PROVIDER [ double precision, pt_chrg_interaction]
+ implicit none
+ BEGIN_DOC
+ ! Interaction between the point charges 
+ END_DOC
+ integer :: i,j
+ double precision               :: Z_A, z_B,A_center(3), B_center(3), dist
+ pt_chrg_interaction = 0.d0
+ do  i = 1, n_pts_charge
+   Z_A = pts_charge_z(i)
+   A_center(1:3) = pts_charge_coord(i,1:3)
+   do  j = i+1, n_pts_charge
+     Z_B = pts_charge_z(j)
+     B_center(1:3) = pts_charge_coord(j,1:3)
+     dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
+     dist = dsqrt(dist)
+     pt_chrg_interaction += Z_A*Z_B/dist
+   enddo
+ enddo
+ print*,'Interaction between the point charges '
+ print*,'pt_chrg_interaction = ',pt_chrg_interaction
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_interaction]
+ implicit none
+ BEGIN_DOC
+ ! repulsion between the point charges and the nuclei
+ END_DOC
+ integer :: i,j
+ double precision               :: Z_A, z_B,A_center(3), B_center(3), dist
+ pt_chrg_nuclei_interaction = 0.d0
+ do  i = 1, n_pts_charge
+   Z_A = pts_charge_z(i)
+   A_center(1:3) = pts_charge_coord(i,1:3)
+   do  j = 1, nucl_num
+     Z_B = nucl_charge(j)
+     B_center(1:3) = nucl_coord(j,1:3)
+     dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
+     dist = dsqrt(dist)
+     pt_chrg_nuclei_interaction += Z_A*Z_B/dist
+   enddo
+ enddo
+ print*,'Interaction between point charges and nuclei'
+ print*,'pt_chrg_nuclei_interaction = ',pt_chrg_nuclei_interaction
+END_PROVIDER 
+
diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py
new file mode 100644
index 00000000..6dbcd5b8
--- /dev/null
+++ b/src/nuclei/write_pt_charges.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python  
+import os
+import sys
+
+# First argument is the EZFIO file
+# It reads a file EZFIO_point_charges.xyz written in this way:
+# charge x y z (Angstrom)
+# for all charges
+
+
+def zip_in_ezfio(ezfio,tmp):
+  tmpzip=tmp+".gz"
+  cmdzip="gzip -c "+tmp+" > "+tmpzip
+  os.system(cmdzip)
+  os.system("rm "+tmp)
+  cmdmv="mv "+tmpzip+" "+EZFIO+"/nuclei/"+tmpzip
+  os.system(cmdmv)
+
+def mv_in_ezfio(ezfio,tmp):
+  cmdmv="mv "+tmp+" "+EZFIO+"/nuclei/"+tmp
+  os.system(cmdmv)
+
+
+# Getting the EZFIO
+EZFIO=sys.argv[1]
+EZFIO=EZFIO.replace("/", "")
+print(EZFIO)
+
+# Reading the point charges and convert the Angstrom geometry in Bohr for QP
+f = open(EZFIO+'_point_charges.xyz','r')
+lines = f.readlines()
+convert_angs_to_bohr=1.8897259885789233
+
+n_charges=0
+coord_x=[]
+coord_y=[]
+coord_z=[]
+charges=[]
+for line in lines:
+  data = line.split()
+  if(len(data)>0):
+   n_charges += 1
+   charges.append(str(data[0]))
+   coord_x.append(str(convert_angs_to_bohr*float(data[1])))
+   coord_y.append(str(convert_angs_to_bohr*float(data[2])))
+   coord_z.append(str(convert_angs_to_bohr*float(data[3])))
+
+# Write the file containing the number of charges and set in EZFIO folder
+tmp="n_pts_charge"
+fncharges = open(tmp,'w')
+fncharges.write(" "+str(n_charges)+'\n')
+fncharges.close()
+mv_in_ezfio(EZFIO,tmp)
+
+# Write the file containing the charges and set in EZFIO folder 
+tmp="pts_charge_z"
+fcharges = open(tmp,'w')
+fcharges.write(" 1\n")
+fcharges.write(" "+str(n_charges)+'\n')
+for i in range(n_charges):
+ fcharges.write(charges[i]+'\n')
+fcharges.close()
+zip_in_ezfio(EZFIO,tmp)
+
+# Write the file containing the charge coordinates and set in EZFIO folder
+tmp="pts_charge_coord"
+fcoord = open(tmp,'w')
+fcoord.write("  2\n")
+fcoord.write("                   "+str(n_charges)+'                    3\n')
+#fcoord.write(" "+'   3 '+str(n_charges)+' \n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_x[i]+'\n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_y[i]+'\n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_z[i]+'\n')
+fcoord.close()
+zip_in_ezfio(EZFIO,tmp)
+
+
diff --git a/src/ortho_three_e_ints/NEED b/src/ortho_three_e_ints/NEED
new file mode 100644
index 00000000..ad7b6bf8
--- /dev/null
+++ b/src/ortho_three_e_ints/NEED
@@ -0,0 +1 @@
+bi_ort_ints
diff --git a/src/ortho_three_e_ints/io_6_index_tensor.irp.f b/src/ortho_three_e_ints/io_6_index_tensor.irp.f
new file mode 100644
index 00000000..dd654f7e
--- /dev/null
+++ b/src/ortho_three_e_ints/io_6_index_tensor.irp.f
@@ -0,0 +1,63 @@
+
+subroutine write_array_6_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in) :: name_file 
+ double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb,n_orb)
+
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'W')
+ write(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_array_6_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in)  :: name_file 
+ double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb,n_orb)
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'R')
+ read(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_fcidump_3_tc(array)
+ implicit none
+ double precision, intent(out) :: array(mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)
+ integer :: i,j,k,l,m,n,i_mo, Reason
+ double precision :: integral 
+ print*,'Reading the THREE-body integrals from a TC FCIDUMP'
+ open (unit=15, file="TCDUMP-nosym", status='old',    &
+              access='sequential', action='read' )
+ read(15,*)i_mo
+ if(i_mo.ne.mo_num)then
+  print*,'Something went wrong in the read_fcidump_3_tc !'
+  print*,'i_mo.ne.mo_num !'
+  print*,i_mo,mo_num
+  stop
+ endif
+ do 
+  read(15,*,IOSTAT=Reason)integral,i, j, m, k, l, n
+  if(Reason > 0)then
+   print*,'Something went wrong in the I/O of read_fcidump_3_tc'
+   stop
+  else if(Reason < 0)then
+   exit   
+  else
+        !                          1 2 3 1 2 3
+        !                         <i j m|k l n>
+        !                         (ik|jl|mn)
+!        integral = integral * 1.d0/3.d0 !!!! For NECI convention 
+          array(i,j,m,k,l,n) =  integral * 3.d0
+  
+   endif
+  enddo
+
+end
diff --git a/src/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f b/src/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f
new file mode 100644
index 00000000..a3f1b6ef
--- /dev/null
+++ b/src/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f
@@ -0,0 +1,193 @@
+subroutine give_integrals_3_body(i,j,m,k,l,n,integral)
+ implicit none
+ double precision, intent(out) :: integral
+ integer, intent(in) :: i,j,m,k,l,n
+ double precision :: weight
+ BEGIN_DOC
+! <ijm|L|kln>
+ END_DOC
+ integer :: ipoint,mm
+ integral = 0.d0
+ do mm = 1, 3
+  do ipoint = 1, n_points_final_grid
+   weight = final_weight_at_r_vector(ipoint)                                                                          
+   integral += weight * mos_in_r_array_transp(ipoint,i) * mos_in_r_array_transp(ipoint,k) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,j,l) 
+   integral += weight * mos_in_r_array_transp(ipoint,j) * mos_in_r_array_transp(ipoint,l) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+   integral += weight * mos_in_r_array_transp(ipoint,m) * mos_in_r_array_transp(ipoint,n) * x_W_ij_erf_rk(ipoint,mm,j,l) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+  enddo
+ enddo
+end
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ integer :: i,j,k,l,ipoint
+ do ipoint = 1, n_points_final_grid
+  mo_v_ij_erf_rk_cst_mu_naive(:,:,ipoint) = 0.d0
+  do i = 1, mo_num
+   do j = 1, mo_num
+    do k = 1, ao_num
+     do l = 1, ao_num
+      mo_v_ij_erf_rk_cst_mu_naive(j,i,ipoint) += mo_coef(l,j) * 0.5d0 * v_ij_erf_rk_cst_mu(l,k,ipoint) * mo_coef(k,i)
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ integer :: ipoint
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ij_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+ do ipoint = 1, n_points_final_grid
+   call ao_to_mo(v_ij_erf_rk_cst_mu(1,1,ipoint),size(v_ij_erf_rk_cst_mu,1),mo_v_ij_erf_rk_cst_mu(1,1,ipoint),size(mo_v_ij_erf_rk_cst_mu,1))
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ mo_v_ij_erf_rk_cst_mu = mo_v_ij_erf_rk_cst_mu * 0.5d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_transp, ( n_points_final_grid,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ integer :: ipoint,i,j
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do ipoint = 1, n_points_final_grid
+    mo_v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ij_erf_rk_cst_mu(j,i,ipoint)
+   enddo
+  enddo
+ enddo
+ FREE mo_v_ij_erf_rk_cst_mu
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_num,3,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr  x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/|r - R| on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ integer :: i,j,k,l,ipoint,m
+ do ipoint = 1, n_points_final_grid
+  mo_x_v_ij_erf_rk_cst_mu_naive(:,:,:,ipoint) = 0.d0
+  do i = 1, mo_num
+   do j = 1, mo_num
+    do m = 1, 3
+     do k = 1, ao_num
+      do l = 1, ao_num
+       mo_x_v_ij_erf_rk_cst_mu_naive(j,i,m,ipoint) += mo_coef(l,j) * 0.5d0 * x_v_ij_erf_rk_cst_mu_transp(l,k,m,ipoint) * mo_coef(k,i)
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,3,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/2|r - R| on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ integer :: ipoint,m
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint,m) & 
+ !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ij_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+ do ipoint = 1, n_points_final_grid
+  do m = 1, 3
+   call ao_to_mo(x_v_ij_erf_rk_cst_mu_transp(1,1,m,ipoint),size(x_v_ij_erf_rk_cst_mu_transp,1),mo_x_v_ij_erf_rk_cst_mu(1,1,m,ipoint),size(mo_x_v_ij_erf_rk_cst_mu,1))
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ mo_x_v_ij_erf_rk_cst_mu = 0.5d0 * mo_x_v_ij_erf_rk_cst_mu
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu_transp, (n_points_final_grid,3, mo_num, mo_num)]
+ implicit none
+ integer :: i,j,m,ipoint
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do m = 1, 3
+    do ipoint = 1, n_points_final_grid
+     mo_x_v_ij_erf_rk_cst_mu_transp(ipoint,m,j,i) = mo_x_v_ij_erf_rk_cst_mu(j,i,m,ipoint)
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, x_W_ij_erf_rk, ( n_points_final_grid,3,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! W_mn^X(R) = \int dr phi_m(r) phi_n(r) (1 - erf(mu |r-R|)) (x-X)
+!
+! WARNING: not on the BI-ORTHO MOs
+ END_DOC
+ include 'constants.include.F'
+ integer :: ipoint,m,i,j
+ double precision :: xyz,cst
+ double precision :: wall0, wall1
+
+ cst = 0.5d0 * inv_sq_pi
+ print*,'providing x_W_ij_erf_rk ...'
+ call wall_time(wall0)
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint,m,i,j,xyz) & 
+ !$OMP SHARED (x_W_ij_erf_rk,n_points_final_grid,mo_x_v_ij_erf_rk_cst_mu_transp,mo_v_ij_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do m = 1, 3
+    do ipoint = 1, n_points_final_grid
+     xyz = final_grid_points(m,ipoint)
+     x_W_ij_erf_rk(ipoint,m,j,i)  =  mo_x_v_ij_erf_rk_cst_mu_transp(ipoint,m,j,i) - xyz * mo_v_ij_erf_rk_cst_mu_transp(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ FREE mo_v_ij_erf_rk_cst_mu_transp 
+ FREE mo_x_v_ij_erf_rk_cst_mu_transp
+ call wall_time(wall1)
+ print*,'time to provide x_W_ij_erf_rk = ',wall1 - wall0
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, sqrt_weight_at_r, (n_points_final_grid)]
+ implicit none
+ integer :: ipoint
+ do ipoint = 1, n_points_final_grid
+  sqrt_weight_at_r(ipoint) = dsqrt(final_weight_at_r_vector(ipoint))
+ enddo
+END_PROVIDER 
+
diff --git a/src/tc_bi_ortho/31.tc_bi_ortho.bats b/src/tc_bi_ortho/31.tc_bi_ortho.bats
new file mode 100644
index 00000000..f5b9d8c0
--- /dev/null
+++ b/src/tc_bi_ortho/31.tc_bi_ortho.bats
@@ -0,0 +1,49 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_Ne() {
+  qp set_file Ne_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-128.77020441279302
+  energy="$(grep "eigval_right_tc_bi_orth =" Ne_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "Ne" {
+ run_Ne 
+}
+
+
+function run_C() {
+  qp set_file C_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-37.757536149952514
+  energy="$(grep "eigval_right_tc_bi_orth =" C_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "C" {
+ run_C 
+}
+
+function run_O() {
+  qp set_file C_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-74.908518517716161
+  energy="$(grep "eigval_right_tc_bi_orth =" O_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "O" {
+ run_O 
+}
+
diff --git a/src/tc_bi_ortho/EZFIO.cfg b/src/tc_bi_ortho/EZFIO.cfg
new file mode 100644
index 00000000..a34d2134
--- /dev/null
+++ b/src/tc_bi_ortho/EZFIO.cfg
@@ -0,0 +1,11 @@
+[psi_l_coef_bi_ortho]
+interface: ezfio
+doc: Coefficients for the left wave function
+type: double precision
+size: (determinants.n_det,determinants.n_states)
+
+[psi_r_coef_bi_ortho]
+interface: ezfio
+doc: Coefficients for the right wave function
+type: double precision
+size: (determinants.n_det,determinants.n_states)
diff --git a/src/tc_bi_ortho/NEED b/src/tc_bi_ortho/NEED
new file mode 100644
index 00000000..9a0c20ef
--- /dev/null
+++ b/src/tc_bi_ortho/NEED
@@ -0,0 +1,6 @@
+bi_ort_ints
+bi_ortho_mos
+tc_keywords
+non_hermit_dav
+dav_general_mat
+tc_scf
diff --git a/src/tc_bi_ortho/compute_deltamu_right.irp.f b/src/tc_bi_ortho/compute_deltamu_right.irp.f
new file mode 100644
index 00000000..7ca2c890
--- /dev/null
+++ b/src/tc_bi_ortho/compute_deltamu_right.irp.f
@@ -0,0 +1,53 @@
+program compute_deltamu_right
+
+  implicit none
+
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  read_wf = .True.
+  touch read_wf
+
+  PROVIDE N_int 
+  call delta_right()
+
+end
+
+! ---
+
+subroutine delta_right()
+
+  implicit none
+  integer                       :: k
+  double precision, allocatable :: delta(:,:) 
+
+  print *, j1b_type
+  print *, j1b_pen
+  print *, mu_erf
+
+  allocate( delta(N_det,N_states) )
+  delta = 0.d0
+
+  do k = 1, N_states
+  !do k = 1, 1
+
+    ! get < I_left | H_mu - H | psi_right > 
+    !call get_h_bitc_right(psi_det, psi_r_coef_bi_ortho(:,k), N_det, N_int, delta(:,k))
+    call get_delta_bitc_right(psi_det, psi_r_coef_bi_ortho(:,k), N_det, N_int, delta(:,k))
+
+    ! order as QMCCHEM
+    call dset_order(delta(:,k), psi_bilinear_matrix_order, N_det)
+
+  enddo
+
+!  call ezfio_set_dmc_dress_dmc_delta_h(delta)
+
+  deallocate(delta)
+
+  return
+end subroutine delta_right
+
+! ---
+
diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
new file mode 100644
index 00000000..08913bab
--- /dev/null
+++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
@@ -0,0 +1,155 @@
+
+! ---
+
+subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H_TC - H | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: h_mono, h_twoe, h_tot
+  double precision               :: htc_mono, htc_twoe, htc_three, htc_tot
+  double precision               :: delta_mat
+
+  print *, ' get_delta_bitc_right ...'
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call hmat_bi_ortho         (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  delta = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
+ !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)      &
+ !$OMP PRIVATE(i, j, delta_mat, h_mono, h_twoe, h_tot, &
+ !$OMP        htc_mono, htc_twoe, htc_three, htc_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | Htilde | J >
+      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      ! < I | H | J >
+      call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+      delta_mat = htc_tot - h_tot
+
+      delta(i) = delta(i) + psicoef(j) * delta_mat
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end subroutine get_delta_bitc_right
+
+! ---
+
+subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H_TC | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: htc_mono, htc_twoe, htc_three, htc_tot
+
+  print *, ' get_htc_bitc_right ...'
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+
+  delta = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
+ !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)      &
+ !$OMP PRIVATE(i, j, htc_mono, htc_twoe, htc_three, htc_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | Htilde | J >
+      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+
+      delta(i) = delta(i) + psicoef(j) * htc_tot
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end subroutine get_htc_bitc_right
+
+! ---
+
+subroutine get_h_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: h_mono, h_twoe, h_tot
+
+  print *, ' get_h_bitc_right ...'
+
+  i = 1
+  j = 1
+  call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  !double precision :: norm
+  !norm = 0.d0
+  !do i = 1, ndet
+  !  norm += psicoef(i) * psicoef(i)
+  !enddo
+  !print*, ' norm = ', norm
+ 
+  call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  delta = 0.d0
+! !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+! !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)    &
+! !$OMP PRIVATE(i, j, h_mono, h_twoe, h_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | H | J >
+      call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+      delta(i) = delta(i) + psicoef(j) * h_tot
+    enddo
+  enddo
+! !$OMP END PARALLEL DO
+
+end subroutine get_h_bitc_right
+
+! ---
+
diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
new file mode 100644
index 00000000..ec66a8b5
--- /dev/null
+++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
@@ -0,0 +1,104 @@
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+ BEGIN_PROVIDER [ double precision, e_tilde_00]
+ implicit none
+ double precision :: hmono,htwoe,hthree,htot
+ call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
+ e_tilde_00 = htot
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth]
+&BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth_single]
+&BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth_double]
+ implicit none 
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e
+ e_pt2_tc_bi_orth = 0.d0
+ e_pt2_tc_bi_orth_single = 0.d0
+ e_pt2_tc_bi_orth_double = 0.d0
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree == 1 .or. degree == 2)then
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   delta_e = e_tilde_00 - e_i0
+   coef_pt1 = htilde_ij / delta_e
+   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   e_pt2_tc_bi_orth += coef_pt1 * htilde_ij
+   if(degree == 1)then
+    e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij
+   else 
+!    print*,'coef_pt1, e_pt2',coef_pt1,coef_pt1 * htilde_ij
+    e_pt2_tc_bi_orth_double += coef_pt1 * htilde_ij
+   endif
+  endif
+ enddo
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00]
+ implicit none
+ double precision :: hmono,htwoe,hthree,htilde_ij
+ call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
+ e_tilde_bi_orth_00 += nuclear_repulsion
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_corr_bi_orth ]
+&BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj ]
+&BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth ]
+&BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth ]
+ implicit none 
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij
+ 
+ e_corr_bi_orth = 0.d0
+ e_corr_single_bi_orth = 0.d0
+ e_corr_double_bi_orth = 0.d0
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+  if(degree == 1)then
+   e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+  else if(degree == 2)then
+   e_corr_double_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+!   print*,'coef_wf , e_cor',reigvec_tc_bi_orth(i,1)/reigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+  endif
+ enddo
+ e_corr_bi_orth_proj = e_corr_single_bi_orth + e_corr_double_bi_orth
+ e_corr_bi_orth = eigval_right_tc_bi_orth(1) - e_tilde_bi_orth_00
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_tc_left_right ]
+ implicit none
+ integer :: i,j
+ double precision :: hmono,htwoe,hthree,htilde_ij,accu
+ e_tc_left_right = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1)
+  do j = 1, N_det
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1)
+  enddo
+ enddo
+ e_tc_left_right *= 1.d0/accu 
+ e_tc_left_right += nuclear_repulsion
+
+ END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)]
+ implicit none
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree==0)then
+   coef_pt1_bi_ortho(i) = 1.d0
+  else
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   delta_e = e_tilde_00 - e_i0
+   coef_pt1 = htilde_ij / delta_e
+   coef_pt1_bi_ortho(i)= coef_pt1
+  endif
+ enddo
+END_PROVIDER
diff --git a/src/tc_bi_ortho/h_biortho.irp.f b/src/tc_bi_ortho/h_biortho.irp.f
new file mode 100644
index 00000000..492e1282
--- /dev/null
+++ b/src/tc_bi_ortho/h_biortho.irp.f
@@ -0,0 +1,243 @@
+
+! --
+
+subroutine hmat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i > where | key_j > is developed on the LEFT basis and | key_i > is developed on the RIGHT basis
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+
+  integer                       :: degree 
+
+  hmono = 0.d0
+  htwoe = 0.d0
+  htot  = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree .gt. 2) return
+
+  if(degree == 0) then
+
+    call diag_hmat_bi_ortho(Nint, key_i, hmono, htwoe)
+    htot = htot + nuclear_repulsion
+
+  else if (degree == 1) then
+
+    call single_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  else if(degree == 2) then
+
+    call double_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  endif
+
+  htot += hmono + htwoe
+
+  return
+end subroutine hmat_bi_ortho
+
+! ---
+
+subroutine diag_hmat_bi_ortho(Nint, key_i, hmono, htwoe)
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: Ne(2), i, j, ii, jj, ispin, jspin
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  do ispin = 1, 2 
+    do i = 1, Ne(ispin)
+      ii = occ(i,ispin) 
+      hmono += mo_bi_ortho_one_e(ii,ii)
+    enddo
+  enddo
+
+  ! alpha/beta two-body
+  ispin = 1
+  jspin = 2 
+  do i = 1, Ne(ispin) ! electron 1
+    ii = occ(i,ispin) 
+    do j = 1, Ne(jspin) ! electron 2 
+      jj = occ(j,jspin) 
+      htwoe += mo_bi_ortho_coul_e(jj,ii,jj,ii) 
+    enddo
+  enddo
+ 
+  ! alpha/alpha two-body
+  do i = 1, Ne(ispin)
+    ii = occ(i,ispin) 
+    do j = i+1, Ne(ispin)
+      jj = occ(j,ispin) 
+      htwoe += mo_bi_ortho_coul_e(ii,jj,ii,jj) - mo_bi_ortho_coul_e(ii,jj,jj,ii)
+    enddo
+  enddo
+ 
+  ! beta/beta two-body
+  do i = 1, Ne(jspin)
+    ii = occ(i,jspin) 
+    do j = i+1, Ne(jspin)
+      jj = occ(j,jspin) 
+      htwoe += mo_bi_ortho_coul_e(ii,jj,ii,jj) - mo_bi_ortho_coul_e(ii,jj,jj,ii)
+    enddo
+  enddo
+
+  return
+end subroutine diag_hmat_bi_ortho
+
+! ---
+
+subroutine single_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i > for single excitation 
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, ispin, jspin
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  double precision              :: phase
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree .ne. 1) then
+    return
+  endif
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 1, h1, p1, h2, p2, s1, s2)
+
+  hmono = mo_bi_ortho_one_e(p1,h1) * phase
+
+  ! alpha/beta two-body 
+  ispin = other_spin(s1)
+  if(s1 == 1) then
+
+    ! single alpha 
+    do i = 1, Ne(ispin) ! electron 2 
+      ii = occ(i,ispin) 
+      htwoe += mo_bi_ortho_coul_e(ii,p1,ii,h1) 
+    enddo
+
+  else
+
+    ! single beta 
+    do i = 1, Ne(ispin) ! electron 1 
+      ii = occ(i,ispin) 
+      htwoe += mo_bi_ortho_coul_e(p1,ii,h1,ii) 
+    enddo
+
+  endif
+
+  ! same spin two-body 
+  do i = 1, Ne(s1)
+    ii = occ(i,s1) 
+    ! ( h1 p1 |ii ii ) - ( h1 ii | p1 ii )
+    htwoe += mo_bi_ortho_coul_e(ii,p1,ii,h1) - mo_bi_ortho_coul_e(p1,ii,ii,h1) 
+  enddo
+   
+  htwoe *= phase
+
+end subroutine single_hmat_bi_ortho
+
+! ---
+
+subroutine double_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i> for double excitation
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, ispin, jspin
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  double precision              :: phase
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  if(degree .ne. 2) then
+    return
+  endif
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1 .ne. s2) then
+
+    htwoe = mo_bi_ortho_coul_e(p2,p1,h2,h1) 
+
+  else
+
+    ! same spin two-body 
+
+    !                    direct terms                 exchange terms 
+    htwoe = mo_bi_ortho_coul_e(p2,p1,h2,h1) - mo_bi_ortho_coul_e(p1,p2,h2,h1) 
+
+  endif
+
+  htwoe *= phase
+
+end subroutine double_hmat_bi_ortho
+
+! ---
+
+
diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
new file mode 100644
index 00000000..b7129d36
--- /dev/null
+++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
@@ -0,0 +1,92 @@
+subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
+
+  use bitmasks
+
+  BEGIN_DOC
+    ! Application of H_TC on a vector 
+    !
+    ! v(i,istate) = \sum_j u(j,istate) H_TC(i,j), with: 
+    !   H_TC(i,j) = < Di | H_TC | Dj > 
+    !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: N_st, sze
+  double precision, intent(in)    :: u(sze,N_st)
+  double precision, intent(inout) :: v(sze,N_st)
+
+  integer                         :: i, j, istate
+  double precision                :: htot
+
+  PROVIDE N_int 
+  PROVIDE psi_det
+
+
+ ! TODO : transform it with the bi-linear representation in terms of alpha-beta. 
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+
+  v = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+ !$OMP SHARED(N_st, sze, N_int, psi_det, u, v)       &
+ !$OMP PRIVATE(istate, i, j, htot)
+  do istate = 1, N_st
+    do i = 1, sze
+      do j = 1, sze
+        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+        v(i,istate) = v(i,istate) + htot * u(j,istate)
+      enddo
+    enddo 
+  enddo
+ !$OMP END PARALLEL DO
+
+end 
+
+subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
+
+  use bitmasks
+
+  BEGIN_DOC
+    ! Application of (H_TC)^dagger on a vector 
+    !
+    ! v(i,istate) = \sum_j u(j,istate) H_TC(j,i), with: 
+    !   H_TC(i,j) = < Di | H_TC | Dj > 
+    !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: N_st, sze
+  double precision, intent(in)    :: u(sze,N_st)
+  double precision, intent(inout) :: v(sze,N_st)
+
+  integer                         :: i, j, istate
+  double precision                :: htot
+
+  PROVIDE N_int
+  PROVIDE psi_det
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+
+  v = 0.d0
+
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+ !$OMP SHARED(N_st, sze, N_int, psi_det, u, v)       &
+ !$OMP PRIVATE(istate, i, j, htot)
+  do istate = 1, N_st
+    do i = 1, sze
+      do j = 1, sze
+        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
+        v(i,istate) = v(i,istate) + htot * u(j,istate)
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end 
+
diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f
new file mode 100644
index 00000000..81f5fb2c
--- /dev/null
+++ b/src/tc_bi_ortho/normal_ordered.irp.f
@@ -0,0 +1,319 @@
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC 
+  ! Normal ordering of the three body interaction on the HF density
+  END_DOC 
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+
+  integer :: i,h1,p1,h2,p2
+  integer :: hh1,hh2,pp1,pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision :: hthree_aba,hthree_aaa,hthree_aab
+  double precision :: wall0,wall1
+ 
+  PROVIDE N_int
+
+  allocate( occ(N_int*bit_kind_size,2) )
+  allocate( key_i_core(N_int,2) )
+
+  if(core_tc_op) then
+    do i = 1, N_int
+      key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+      key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+    enddo
+    call bitstring_to_list_ab(key_i_core,occ,Ne,N_int)
+  else
+    call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+  endif
+
+  normal_two_body_bi_orth = 0.d0
+  print*,'Providing normal_two_body_bi_orth ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & 
+ !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb
+    h1 = list_act(hh1) 
+    do pp1 = 1, n_act_orb
+      p1 = list_act(pp1)
+      do hh2 = 1, n_act_orb
+        h2 = list_act(hh2) 
+        do pp2 = 1, n_act_orb
+          p2 = list_act(pp2)
+          ! opposite spin double excitations 
+          call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba)
+          ! same spin double excitations with opposite spin contributions 
+          if(h1<h2.and.p1.gt.p2)then
+           call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2
+           ! same spin double excitations with same spin contributions 
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2
+           else
+             hthree_aaa = 0.d0
+           endif
+          else
+           call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab)
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa)
+           else
+             hthree_aaa = 0.d0
+           endif
+          endif
+          normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0
+
+  deallocate( occ )
+  deallocate( key_i_core )
+
+END_PROVIDER 
+
+
+
+subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii, i
+  double precision              :: int_direct, int_exc_12, int_exc_13, integral
+
+  !!!! double alpha/beta
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13 = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral)
+    int_exc_12 = -1.d0 * integral
+    hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12)
+  enddo
+  do ii = Ne(2) + 1, Ne(1) ! purely open-shell part 
+   i = occ(ii,1)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13 = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral)
+    int_exc_12 = -1.d0 * integral
+    hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12)
+  enddo
+
+end subroutine give_aba_contraction
+
+
+
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations 
+  END_DOC
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: h1, p1, h2, p2, i
+  integer                        :: hh1, hh2, pp1, pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision               :: hthree
+
+  PROVIDE N_int
+
+  allocate( key_i_core(N_int,2) )
+  allocate( occ(N_int*bit_kind_size,2) )
+
+  if(core_tc_op)then
+   do i = 1, N_int
+    key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core,occ,Ne,N_int)
+  else
+   call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+  endif
+  normal_two_body_bi_orth_ab = 0.d0
+  do hh1 = 1, n_act_orb
+   h1 = list_act(hh1) 
+   do pp1 = 1, n_act_orb
+    p1 = list_act(pp1)
+    do hh2 = 1, n_act_orb
+     h2 = list_act(hh2) 
+     do pp2 = 1, n_act_orb
+      p2 = list_act(pp2)
+      call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree)
+      normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree    
+     enddo
+    enddo
+   enddo
+  enddo
+
+  deallocate( key_i_core )
+  deallocate( occ )
+
+END_PROVIDER 
+
+
+
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)]
+
+  BEGIN_DOC
+  ! Normal ordered two-body sector of the three-body terms for same spin double excitations 
+  END_DOC
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i,ii,j,h1,p1,h2,p2
+  integer                        :: hh1,hh2,pp1,pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision               :: hthree_aab, hthree_aaa
+
+  PROVIDE N_int
+
+  allocate( key_i_core(N_int,2) )
+  allocate( occ(N_int*bit_kind_size,2) )
+
+  if(core_tc_op)then
+    do i = 1, N_int
+      key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+      key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+    enddo
+    call bitstring_to_list_ab(key_i_core, occ, Ne, N_int)
+  else
+    call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int)
+  endif
+
+  normal_two_body_bi_orth_aa_bb = 0.d0
+  do hh1 = 1, n_act_orb
+    h1 = list_act(hh1) 
+    do pp1 = 1 , n_act_orb
+      p1 = list_act(pp1)
+      do hh2 = 1, n_act_orb
+        h2 = list_act(hh2) 
+        do pp2 = 1 , n_act_orb
+          p2 = list_act(pp2)
+          if(h1<h2.and.p1.gt.p2)then
+           call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2
+           else
+             hthree_aaa = 0.d0
+           endif
+          else
+           call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab)
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa)
+           else
+             hthree_aaa = 0.d0
+           endif
+          endif
+          normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate( key_i_core )
+  deallocate( occ )
+
+END_PROVIDER 
+
+
+
+subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii,i
+  double precision              :: int_direct,int_exc_12,int_exc_13,int_exc_23
+  double precision              :: integral,int_exc_l,int_exc_ll
+
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral)
+    int_exc_l = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral)
+    int_exc_ll= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral)
+    int_exc_12= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral)
+    int_exc_23= -1.d0 * integral
+
+    hthree +=  1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23  )
+  enddo
+  do ii = Ne(2)+1,Ne(1) ! purely open-shell part 
+    i = occ(ii,1)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral)
+    int_exc_l = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral)
+    int_exc_ll= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral)
+    int_exc_12= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral)
+    int_exc_23= -1.d0 * integral
+
+    hthree +=  1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23  ))
+  enddo
+
+end subroutine give_aaa_contraction
+
+
+
+subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+  implicit none
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii, i
+  double precision              :: int_direct, int_exc_12, int_exc_13, int_exc_23
+  double precision              :: integral, int_exc_l, int_exc_ll
+
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral)
+    int_exc_23= -1.d0 * integral
+    hthree  +=  1.d0 * int_direct - int_exc_23
+  enddo
+
+end subroutine give_aab_contraction
diff --git a/src/tc_bi_ortho/print_he_tc_energy.irp.f b/src/tc_bi_ortho/print_he_tc_energy.irp.f
new file mode 100644
index 00000000..84d34bcb
--- /dev/null
+++ b/src/tc_bi_ortho/print_he_tc_energy.irp.f
@@ -0,0 +1,142 @@
+
+! ---
+
+program print_he_tc_energy
+
+  implicit none
+
+  call print_overlap()
+
+  call print_energy1()
+
+end 
+
+! ---
+
+subroutine print_overlap()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: S_ij
+
+  print *, ' ao_overlap:'
+  do i = 1, ao_num
+    do j = 1, ao_num
+      print *, j, i, ao_overlap(j,i)
+    enddo
+  enddo
+
+  print *, ' mo_overlap:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      S_ij = 0.d0
+      do k = 1, ao_num
+        do l = 1, ao_num
+          S_ij += mo_l_coef(k,i) * ao_overlap(k,l) * mo_r_coef(l,j)
+        enddo
+      enddo
+      
+      print *, i, j, S_ij
+    enddo
+  enddo
+
+end subroutine print_overlap
+
+! ---
+
+subroutine print_energy1()
+
+  implicit none
+  integer                    :: i, j, k, l
+  double precision           :: e, n, e_tmp, n_tmp, e_ns
+  double precision, external :: ao_two_e_integral
+
+  e = 0.d0
+  n = 0.d0
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! < phi_1 phi_1 | h1 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_1 | h2 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  e_ns = 0.d0
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          ! ao_two_e_tc_tot(i,j,k,l) = <k i| V^TC(r_12) |l j>
+          e += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_two_e_tc_tot(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+
+          e_ns += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_non_hermit_term_chemist(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  ! < phi_1 phi_1 | phi_1 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  e    = e    / n 
+  e_ns = e_ns / n
+
+  print *, ' tc      energy = ', e
+  print *, ' non-sym energy = ', e_ns
+
+end subroutine print_energy1
+
+! ---
+
+
diff --git a/src/tc_bi_ortho/print_tc_spin_dens.irp.f b/src/tc_bi_ortho/print_tc_spin_dens.irp.f
new file mode 100644
index 00000000..8308140d
--- /dev/null
+++ b/src/tc_bi_ortho/print_tc_spin_dens.irp.f
@@ -0,0 +1,16 @@
+program test_spin_dens
+  implicit none
+  BEGIN_DOC
+! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call tc_print_mulliken_sd
+! call test
+
+end
diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f
new file mode 100644
index 00000000..58a733a7
--- /dev/null
+++ b/src/tc_bi_ortho/print_tc_wf.irp.f
@@ -0,0 +1,104 @@
+program print_tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!  if(three_body_h_tc)then
+!   call provide_all_three_ints_bi_ortho
+!  endif
+!  call routine
+ call write_l_r_wf
+end
+
+subroutine write_l_r_wf
+ implicit none
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen                                                                                 
+ output=trim(ezfio_filename)//'.tc_wf'
+ i_unit_output = getUnitAndOpen(output,'w')
+ integer :: i
+ print*,'Writing the left-right wf'
+ do i = 1, N_det
+  write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i)
+ enddo
+
+
+end
+
+subroutine routine
+ implicit none
+ integer :: i,degree
+ integer          :: exc(0:2,2,2),h1,p1,s1,h2,p2,s2
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e,e_pt2
+ double precision :: contrib_pt,e_corr,coef,contrib,phase
+ double precision :: accu_positive,accu_positive_pt, accu_positive_core,accu_positive_core_pt
+ e_pt2 = 0.d0
+ accu_positive = 0.D0
+ accu_positive_pt = 0.D0
+ accu_positive_core = 0.d0
+ accu_positive_core_pt = 0.d0
+ 
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+   if(degree == 1 .or. degree == 2)then
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+    delta_e = e_tilde_00 - e_i0
+    coef_pt1 = htilde_ij / delta_e
+ 
+    call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+    contrib_pt = coef_pt1 * htilde_ij
+    e_pt2 += contrib_pt
+ 
+    coef = psi_r_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1)
+    contrib = coef * htilde_ij
+    e_corr += contrib
+    call get_excitation(HF_bitmask,psi_det(1,1,i),exc,degree,phase,N_int)
+    call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+    print*,'*********'
+    if(degree==1)then
+     print*,'s1',s1
+     print*,'h1,p1 = ',h1,p1
+    else if(degree ==2)then
+     print*,'s1',s1
+     print*,'h1,p1 = ',h1,p1
+     print*,'s2',s2
+     print*,'h2,p2 = ',h2,p2
+    endif
+    print*,'coef_pt1 = ',coef_pt1
+    print*,'coef     = ',coef
+    print*,'contrib_pt ',contrib_pt
+    print*,'contrib  = ',contrib
+    if(contrib.gt.0.d0)then
+     accu_positive    += contrib
+     if(h1==1.or.h2==1)then
+      accu_positive_core += contrib
+     endif
+     if(dabs(contrib).gt.1.d-5)then
+      print*,'Found a positive contribution to correlation energy !!'
+     endif
+    endif
+    if(contrib_pt.gt.0.d0)then
+     accu_positive_pt += contrib_pt
+     if(h2==1.or.h1==1)then
+      accu_positive_core_pt += contrib_pt
+     endif
+    endif
+   endif
+ enddo
+ print*,''
+ print*,''
+ print*,'Total correlation energy            = ',e_corr
+ print*,'Total correlation energy PT         = ',e_pt2
+ print*,'Positive contribution to ecorr      = ',accu_positive
+ print*,'Positive contribution to ecorr PT   = ',accu_positive_pt
+ print*,'Pure core contribution              = ',accu_positive_core
+ print*,'Pure core contribution PT           = ',accu_positive_core_pt
+end
diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f
new file mode 100644
index 00000000..5dad91ca
--- /dev/null
+++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f
@@ -0,0 +1,151 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_tc, (psi_det_size) ]                                                                
+  implicit none
+  BEGIN_DOC
+  ! Contribution of determinants to the state-averaged density.
+  END_DOC
+  integer                        :: i,j,k
+  double precision               :: f
+    
+  psi_average_norm_contrib_tc(:) = 0.d0
+  do k=1,N_states
+    do i=1,N_det
+      psi_average_norm_contrib_tc(i) = & 
+          dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k))*state_average_weight(k)
+    enddo
+  enddo
+  f = 1.d0/sum(psi_average_norm_contrib_tc(1:N_det))
+  do i=1,N_det
+    psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i)*f
+  enddo
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_tc, (psi_det_size) ]
+&BEGIN_PROVIDER [ integer, psi_det_sorted_tc_order, (psi_det_size) ]
+&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)]
+&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)]
+   implicit none
+   BEGIN_DOC
+   ! Wave function sorted by determinants contribution to the norm (state-averaged)
+   !
+   ! psi_det_sorted_tc_order(i) -> k : index in psi_det
+   !
+   ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc
+   !
+   ! psi_l_coef_sorted_bi_ortho : left  coefficients corresponding to psi_det_sorted_tc
+   END_DOC
+   integer                        :: i,j,k
+   integer, allocatable           :: iorder(:)
+   allocate ( iorder(N_det) )
+   do i=1,N_det
+     iorder(i) = i
+     psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i)
+   enddo
+   call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det)
+   do i=1,N_det
+     psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i)
+     do j=1,N_int
+       psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i))
+       psi_det_sorted_tc(j,2,i) = psi_det(j,2,iorder(i))
+     enddo
+     psi_det_sorted_tc_order(iorder(i)) = i
+   enddo
+   double precision :: accu
+   do k=1,N_states
+    accu = 0.d0
+    do i=1,N_det
+     psi_coef_sorted_tc(i,k) = dsqrt(psi_average_norm_contrib_sorted_tc(i))
+     accu += psi_coef_sorted_tc(i,k)**2
+    enddo
+    accu = 1.d0/dsqrt(accu)
+    do i=1,N_det
+     psi_coef_sorted_tc(i,k) *= accu
+    enddo
+   enddo
+
+   psi_det_sorted_tc(:,:,N_det+1:psi_det_size) = 0_bit_kind
+   psi_coef_sorted_tc(N_det+1:psi_det_size,:) = 0.d0
+   psi_average_norm_contrib_sorted_tc(N_det+1:psi_det_size) = 0.d0
+   psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0
+
+   psi_r_coef_sorted_bi_ortho = 0.d0
+   psi_l_coef_sorted_bi_ortho = 0.d0
+   do i = 1, N_det
+    psi_r_coef_sorted_bi_ortho(i,1:N_states) = psi_r_coef_bi_ortho(iorder(i),1:N_states)
+    psi_l_coef_sorted_bi_ortho(i,1:N_states) = psi_l_coef_bi_ortho(iorder(i),1:N_states)
+   enddo
+   deallocate(iorder)
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_bit, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_bit, (psi_det_size,N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
+   ! They are sorted by determinants interpreted as integers. Useful
+   ! to accelerate the search of a random determinant in the wave
+   ! function.
+   END_DOC
+
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1),       &
+       psi_det_sorted_tc_bit, psi_coef_sorted_tc_bit, N_states)
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_right, (N_int,2,N_det) ]
+&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho_right, (N_det)]
+ implicit none
+ BEGIN_DOC
+ ! psi_det_sorted_tc_right : Slater determinants sorted by decreasing value of |right- coefficients|
+ ! 
+ ! psi_r_coef_sorted_bi_ortho_right : right wave function according to psi_det_sorted_tc_right
+ END_DOC
+ integer, allocatable           :: iorder(:)
+ double precision, allocatable :: coef(:)
+ integer :: i,j
+ allocate ( iorder(N_det) , coef(N_det))
+ do i=1,N_det
+   coef(i) = -dabs(psi_r_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1))
+   iorder(i) = i
+ enddo
+ call dsort(coef,iorder,N_det)
+ do i=1,N_det
+   do j=1,N_int
+     psi_det_sorted_tc_right(j,1,i) = psi_det(j,1,iorder(i))
+     psi_det_sorted_tc_right(j,2,i) = psi_det(j,2,iorder(i))
+   enddo
+  psi_r_coef_sorted_bi_ortho_right(i) = psi_r_coef_bi_ortho(iorder(i),1)/psi_r_coef_bi_ortho(iorder(1),1)
+ enddo
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_left, (N_int,2,N_det) ]
+&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho_left, (N_det)]
+ implicit none
+ BEGIN_DOC
+ ! psi_det_sorted_tc_left : Slater determinants sorted by decreasing value of |LEFTt- coefficients|
+ ! 
+ ! psi_r_coef_sorted_bi_ortho_left : LEFT wave function according to psi_det_sorted_tc_left
+ END_DOC
+ integer, allocatable           :: iorder(:)
+ double precision, allocatable :: coef(:)
+ integer :: i,j
+ allocate ( iorder(N_det) , coef(N_det))
+ do i=1,N_det
+   coef(i) = -dabs(psi_l_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1))
+   iorder(i) = i
+ enddo
+ call dsort(coef,iorder,N_det)
+ do i=1,N_det
+   do j=1,N_int
+     psi_det_sorted_tc_left(j,1,i) = psi_det(j,1,iorder(i))
+     psi_det_sorted_tc_left(j,2,i) = psi_det(j,2,iorder(i))
+   enddo
+  psi_l_coef_sorted_bi_ortho_left(i) = psi_l_coef_bi_ortho(iorder(i),1)/psi_l_coef_bi_ortho(iorder(1),1)
+ enddo
+END_PROVIDER 
diff --git a/src/tc_bi_ortho/psi_left_qmc.irp.f b/src/tc_bi_ortho/psi_left_qmc.irp.f
new file mode 100644
index 00000000..25048f82
--- /dev/null
+++ b/src/tc_bi_ortho/psi_left_qmc.irp.f
@@ -0,0 +1,44 @@
+
+! ---
+
+BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,N_states) ]
+
+  BEGIN_DOC
+  ! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
+  !  $D_\alpha^\dagger.C.D_\beta$
+  !
+  ! Rows are $\alpha$ determinants and columns are $\beta$.
+  !
+  ! Order refers to psi_det
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer :: k, l
+
+  if(N_det .eq. 1) then
+
+    do l = 1, N_states
+      psi_bitcleft_bilinear_matrix_values(1,l) = 1.d0
+    enddo
+
+  else
+
+    do l = 1, N_states
+      do k = 1, N_det
+        psi_bitcleft_bilinear_matrix_values(k,l) = psi_l_coef_bi_ortho(k,l)
+      enddo
+    enddo
+
+    PROVIDE psi_bilinear_matrix_order
+    do l = 1, N_states
+      call dset_order(psi_bitcleft_bilinear_matrix_values(1,l), psi_bilinear_matrix_order, N_det)
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f
new file mode 100644
index 00000000..521acff5
--- /dev/null
+++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f
@@ -0,0 +1,228 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file
+  ! is empty.
+  END_DOC
+
+  integer                        :: i,k, N_int2
+  logical                        :: exists
+  character*(64)                 :: label
+
+  PROVIDE read_wf N_det mo_label ezfio_filename nproc
+  psi_l_coef_bi_ortho = 0.d0
+  do i=1,min(N_states,N_det)
+    psi_l_coef_bi_ortho(i,i) = 1.d0
+  enddo
+
+  if (mpi_master) then
+    if (read_wf) then
+      call ezfio_has_tc_bi_ortho_psi_l_coef_bi_ortho(exists)
+!      if (exists) then
+!        call ezfio_has_tc_bi_ortho_mo_label(exists)
+!        if (exists) then
+!          call ezfio_get_tc_bi_ortho_mo_label(label)
+!          exists = (label == mo_label)
+!        endif
+!      endif
+
+      if (exists) then
+
+        double precision, allocatable  :: psi_l_coef_bi_ortho_read(:,:)
+        allocate (psi_l_coef_bi_ortho_read(N_det,N_states))
+        print *,  'Read psi_l_coef_bi_ortho', N_det, N_states
+        call ezfio_get_tc_bi_ortho_psi_l_coef_bi_ortho(psi_l_coef_bi_ortho_read)
+        do k=1,N_states
+          do i=1,N_det
+            psi_l_coef_bi_ortho(i,k) = psi_l_coef_bi_ortho_read(i,k)
+          enddo
+        enddo
+        deallocate(psi_l_coef_bi_ortho_read)
+
+      else
+
+        print*, 'psi_l_coef_bi_ortho are psi_coef'
+        do k=1,N_states
+          do i=1,N_det
+            psi_l_coef_bi_ortho(i,k) = psi_coef(i,k)
+          enddo
+        enddo
+
+      endif
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call     MPI_BCAST( psi_l_coef_bi_ortho, size(psi_l_coef_bi_ortho), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read psi_l_coef_bi_ortho with MPI'
+    endif
+  IRP_ENDIF
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file
+  ! is empty.
+  END_DOC
+
+  integer                        :: i,k, N_int2
+  logical                        :: exists
+  character*(64)                 :: label
+
+  PROVIDE read_wf N_det mo_label ezfio_filename nproc
+  psi_r_coef_bi_ortho = 0.d0
+  do i=1,min(N_states,N_det)
+    psi_r_coef_bi_ortho(i,i) = 1.d0
+  enddo
+
+  if (mpi_master) then
+    if (read_wf) then
+      call ezfio_has_tc_bi_ortho_psi_r_coef_bi_ortho(exists)
+!      if (exists) then
+!        call ezfio_has_tc_bi_ortho_mo_label(exists)
+!        if (exists) then
+!          call ezfio_get_tc_bi_ortho_mo_label(label)
+!          exists = (label == mo_label)
+!        endif
+!      endif
+
+      if (exists) then
+
+        double precision, allocatable  :: psi_r_coef_bi_ortho_read(:,:)
+        allocate (psi_r_coef_bi_ortho_read(N_det,N_states))
+        print *,  'Read psi_r_coef_bi_ortho', N_det, N_states
+        call ezfio_get_tc_bi_ortho_psi_r_coef_bi_ortho(psi_r_coef_bi_ortho_read)
+        do k=1,N_states
+          do i=1,N_det
+            psi_r_coef_bi_ortho(i,k) = psi_r_coef_bi_ortho_read(i,k)
+          enddo
+        enddo
+        deallocate(psi_r_coef_bi_ortho_read)
+
+      else
+
+        print*, 'psi_r_coef_bi_ortho are psi_coef'
+        do k=1,N_states
+          do i=1,N_det
+            psi_r_coef_bi_ortho(i,k) = psi_coef(i,k)
+          enddo
+        enddo
+
+      endif
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call     MPI_BCAST( psi_r_coef_bi_ortho, size(psi_r_coef_bi_ortho), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read psi_r_coef_bi_ortho with MPI'
+    endif
+  IRP_ENDIF
+END_PROVIDER
+
+
+subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  include 'constants.include.F'
+  integer, intent(in)            :: ndet,nstates,dim_psicoef,sze
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,sze)
+  double precision, intent(in)   :: psilcoef(dim_psicoef,nstates)
+  double precision, intent(in)   :: psircoef(dim_psicoef,nstates)
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  double precision, allocatable  :: psil_coef_save(:,:)
+  double precision, allocatable  :: psir_coef_save(:,:)
+
+  double precision               :: accu_norm
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndet,N_det_qp_edit)
+
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndet)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    allocate (psi_det_save(N_int,2,ndet))
+    do i=1,ndet
+      do j=1,2
+        do k=1,N_int
+          psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psil_coef_save(ndet,nstates),psir_coef_save(ndet,nstates))
+    do k=1,nstates
+      do i=1,ndet
+        psil_coef_save(i,k) = psilcoef(i,k)
+        psir_coef_save(i,k) = psircoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(psil_coef_save)
+    call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(psir_coef_save)
+    deallocate (psil_coef_save,psir_coef_save)
+
+    call write_int(6,ndet,'Saved determinantsi and psi_r/psi_l coef')
+  endif
+end
+
+subroutine save_tc_bi_ortho_wavefunction
+ implicit none
+ if(save_sorted_tc_wf)then
+  call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho)
+ else
+  call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho)
+ endif
+ call routine_save_right_bi_ortho
+end
+
+subroutine routine_save_right_bi_ortho
+ implicit none
+ double precision, allocatable :: coef_tmp(:,:)
+ integer :: i
+ allocate(coef_tmp(N_det, N_states))
+ do i = 1, N_det
+  coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states)
+ enddo
+ call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1))
+end                     
+
+subroutine routine_save_left_right_bi_ortho
+ implicit none
+ double precision, allocatable :: coef_tmp(:,:)
+ integer :: i,n_states_tmp
+ n_states_tmp = 2
+ allocate(coef_tmp(N_det, n_states_tmp))
+ do i = 1, N_det
+  coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
+  coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
+ enddo
+ call save_wavefunction_general_unormalized(N_det,n_states_tmp,psi_det,size(coef_tmp,1),coef_tmp(1,1))
+end
+
diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
new file mode 100644
index 00000000..eb812401
--- /dev/null
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
@@ -0,0 +1,76 @@
+program save_bitcpsileft_for_qmcchem
+
+  integer          :: iunit
+  logical          :: exists
+  double precision :: e_ref
+
+  print *, ' '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
+  print *, '  call save_for_qmcchem before  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
+  print *, ' '
+
+  call write_lr_spindeterminants()
+
+  e_ref = 0.d0
+  iunit = 13
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
+
+    call ezfio_has_fci_energy_pt2(exists)
+    if(.not.exists) then
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
+      endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
+    endif
+
+    write(iunit,*) e_ref
+
+  close(iunit)
+
+end
+
+! --
+
+subroutine write_lr_spindeterminants()
+
+  use bitmasks
+
+  implicit none
+
+  integer                       :: k, l
+  double precision, allocatable :: buffer(:,:)
+
+  PROVIDE psi_bitcleft_bilinear_matrix_values
+
+  allocate(buffer(N_det,N_states))
+  do l = 1, N_states
+    do k = 1, N_det
+      buffer(k,l) = psi_bitcleft_bilinear_matrix_values(k,l)
+    enddo
+  enddo
+  call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer)
+  deallocate(buffer)
+
+end subroutine write_lr_spindeterminants
+
+! ---
+
diff --git a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f b/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
new file mode 100644
index 00000000..5eb3c069
--- /dev/null
+++ b/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
@@ -0,0 +1,15 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call routine_save_left_right_bi_ortho
+! call test
+end
diff --git a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f
new file mode 100644
index 00000000..8b6eb1d1
--- /dev/null
+++ b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f
@@ -0,0 +1,35 @@
+ program tc_natorb_bi_ortho
+   implicit none
+   BEGIN_DOC
+ ! TODO : Put the documentation of the program here
+   END_DOC
+   print *, 'Hello world'
+   my_grid_becke = .True.
+   my_n_pt_r_grid = 30
+   my_n_pt_a_grid = 50
+   read_wf = .True.
+   touch read_wf
+   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+   call print_energy_and_mos
+   call save_tc_natorb
+!   call minimize_tc_orb_angles
+ end
+ 
+ subroutine save_tc_natorb 
+  implicit none
+  print*,'Saving the natorbs '
+  provide natorb_tc_leigvec_ao natorb_tc_reigvec_ao
+  call ezfio_set_bi_ortho_mos_mo_l_coef(natorb_tc_leigvec_ao)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(natorb_tc_reigvec_ao)
+  call save_ref_determinant_nstates_1
+  call ezfio_set_determinants_read_wf(.False.)
+ end
+ 
+ subroutine save_ref_determinant_nstates_1
+   implicit none
+   use bitmasks
+   double precision               :: buffer(1,N_states)
+   buffer = 0.d0
+   buffer(1,1) = 1.d0
+   call save_wavefunction_general(1,1,ref_bitmask,1,buffer)
+ end
diff --git a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
new file mode 100644
index 00000000..e6bf3d6e
--- /dev/null
+++ b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
@@ -0,0 +1,61 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  three_body_h_tc = .False.
+  touch three_body_h_tc 
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+
+  call routine_test
+! call test
+end
+
+subroutine routine_test
+ implicit none
+ use bitmasks ! you need to include the bitmasks_module.f90 features
+ integer :: i,n_good,degree
+ integer(bit_kind), allocatable :: dets(:,:,:)
+ integer, allocatable :: iorder(:)
+ double precision, allocatable :: coef(:),coef_new(:,:)
+ double precision :: thr
+ allocate(coef(N_det), iorder(N_det))
+ do i = 1, N_det
+  iorder(i) = i
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree==1)then
+   coef(i) = -0.5d0
+  else
+   coef(i) = -dabs(coef_pt1_bi_ortho(i))
+  endif
+ enddo
+ call dsort(coef,iorder,N_det)
+ !thr = save_threshold
+ thr = 1d-15
+ n_good = 0
+ do i = 1, N_det
+  if(dabs(coef(i)).gt.thr)then
+   n_good += 1
+  endif
+ enddo
+ print*,'n_good = ',n_good
+ allocate(dets(N_int,2,n_good),coef_new(n_good,n_states))
+ do i = 1, n_good
+  dets(:,:,i) = psi_det(:,:,iorder(i))
+  coef_new(i,:) = psi_coef(iorder(i),:)
+ enddo
+ call save_wavefunction_general(n_good,n_states,dets,n_good,coef_new)
+
+
+end
diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc.irp.f
new file mode 100644
index 00000000..2c0ae2ca
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc.irp.f
@@ -0,0 +1,376 @@
+
+! ---
+
+subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer, intent(in)           :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: htot
+  double precision              :: hmono, htwoe, hthree
+  integer :: degree
+
+  call get_excitation_degree(key_j, key_i, degree, Nint)
+  if(degree.gt.2)then
+    htot = 0.d0
+  else
+    call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+  endif
+
+end subroutine htilde_mu_mat_bi_ortho_tot
+
+! --
+
+subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element in terms of single, two and three electron contribution. 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: degree 
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  hthree = 0.D0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+  endif
+
+  if(three_body_h_tc) then
+    if(degree == 2) then
+      if(.not.double_normal_ord) then
+        call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+      endif
+    else if(degree == 1) then
+      call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+    else if(degree == 0) then
+      call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+    endif
+  endif
+
+  htot = hmono + htwoe + hthree
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
+
+subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  !  diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in)  :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2)
+  double precision, intent(out)  :: hmono,htwoe,htot
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  double precision               :: get_mo_two_e_integral_tc_int
+  integer(bit_kind)              :: key_i_core(Nint,2)
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+!
+!  PROVIDE mo_integrals_erf_map core_energy nuclear_repulsion core_bitmask
+!  PROVIDE core_fock_operator
+!
+!  PROVIDE j1b_gauss
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!   hmono = core_energy - nuclear_repulsion
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+   hmono = 0.d0
+!  endif
+  htwoe= 0.d0
+  htot = 0.d0
+
+  do ispin = 1, 2 
+   do i = 1, Ne(ispin) ! 
+    ii = occ(i,ispin) 
+    hmono += mo_bi_ortho_tc_one_e(ii,ii)
+
+!    if(j1b_gauss .eq. 1) then
+!      print*,'j1b not implemented for bi ortho TC'
+!      print*,'stopping  ....'
+!      stop
+!      !hmono += mo_j1b_gauss_hermI  (ii,ii) &
+!      !       + mo_j1b_gauss_hermII (ii,ii) &
+!      !       + mo_j1b_gauss_nonherm(ii,ii)
+!    endif
+
+!    if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!     hmono += core_fock_operator(ii,ii) ! add the usual Coulomb - Exchange from the core 
+!    endif
+   enddo
+  enddo
+
+
+   ! alpha/beta two-body
+   ispin = 1
+   jspin = 2 
+   do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1))
+    ii = occ(i,ispin) 
+    do j = 1, Ne(jspin) ! electron 2 
+     jj = occ(j,jspin) 
+     htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) 
+    enddo
+   enddo
+ 
+   ! alpha/alpha two-body
+   do i = 1, Ne(ispin)
+    ii = occ(i,ispin) 
+    do j = i+1, Ne(ispin)
+     jj = occ(j,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
+    enddo
+   enddo
+ 
+   ! beta/beta two-body
+   do i = 1, Ne(jspin)
+    ii = occ(i,jspin) 
+    do j = i+1, Ne(jspin)
+     jj = occ(j,jspin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
+    enddo
+   enddo
+  htot = hmono + htwoe 
+
+end
+
+
+
+subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono = 0.d0
+  htwoe= 0.d0
+  htot = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+!  endif
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+!   key_j, key_i
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+    if(double_normal_ord.and.+Ne(1).gt.2)then
+     htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+    endif
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+   if(double_normal_ord.and.+Ne(1).gt.2)then
+    htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
+    htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+   endif
+  endif
+  htwoe *= phase
+  htot =  htwoe 
+
+end
+
+
+subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+!
+!  PROVIDE core_bitmask core_fock_operator mo_integrals_erf_map
+
+!  PROVIDE j1b_gauss
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono = 0.d0
+  htwoe= 0.d0
+  htot = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!    key_j_core(i,1) = xor(key_j(i,1),core_bitmask(i,1))
+!    key_j_core(i,2) = xor(key_j(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+!  endif
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+!  if(h1==14.and.p1==2)then
+!   print*,'h1,p1 old = ',h1,p1
+!  endif
+
+  hmono = mo_bi_ortho_tc_one_e(p1,h1) * phase
+
+!  if(j1b_gauss .eq. 1) then
+!     print*,'j1b not implemented for bi ortho TC'
+!     print*,'stopping  ....'
+!     stop
+!    !hmono += ( mo_j1b_gauss_hermI  (h1,p1) &
+!    !         + mo_j1b_gauss_hermII (h1,p1) &
+!    !         + mo_j1b_gauss_nonherm(h1,p1) ) * phase
+!  endif
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   hmono += phase * core_fock_operator(h1,p1)
+!  endif
+  
+   ! alpha/beta two-body 
+   ispin = other_spin(s1)
+   if(s1==1)then
+    ! single alpha 
+    do i = 1, Ne(ispin) ! electron 2 
+     ii = occ(i,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,p1,ii,h1) 
+    enddo
+   else
+    ! single beta 
+    do i = 1, Ne(ispin) ! electron 1 
+     ii = occ(i,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(p1,ii,h1,ii) 
+    enddo
+   endif
+!   ! same spin two-body 
+   do i = 1, Ne(s1)
+    ii = occ(i,s1) 
+    ! (h1p1|ii ii) - (h1 ii| p1 ii)
+    htwoe += mo_bi_ortho_tc_two_e(ii,p1,ii,h1) - mo_bi_ortho_tc_two_e(p1,ii,ii,h1) 
+   enddo
+   
+  htwoe  *= phase
+  htot = hmono + htwoe 
+
+end
+
+
diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f
new file mode 100644
index 00000000..7b73d5f2
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_3e.irp.f
@@ -0,0 +1,289 @@
+subroutine provide_all_three_ints_bi_ortho
+ implicit none
+ BEGIN_DOC
+! routine that provides all necessary three-electron integrals 
+ END_DOC
+ if(three_body_h_tc)then
+  PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
+  PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
+  PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
+  PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
+ endif
+if(.not.double_normal_ord)then
+  PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
+  PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
+else
+ PROVIDE normal_two_body_bi_orth
+endif
+end
+
+subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+
+  BEGIN_DOC
+  !  diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: direct_int, exchange_int
+  double precision              :: sym_3_e_int_from_6_idx_tensor
+  double precision              :: three_e_diag_parrallel_spin
+
+  if(core_tc_op)then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core,occ,Ne,Nint)
+  else
+   call bitstring_to_list_ab(key_i,occ,Ne,Nint)
+  endif
+  hthree = 0.d0
+
+  if(Ne(1)+Ne(2).ge.3)then
+!!  ! alpha/alpha/beta three-body
+   do i = 1, Ne(1)
+    ii = occ(i,1) 
+    do j = i+1, Ne(1)
+     jj = occ(j,1) 
+     do m = 1, Ne(2)
+      mm = occ(m,2) 
+!      direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR 
+!      exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+   enddo
+  
+   ! beta/beta/alpha three-body
+   do i = 1, Ne(2)
+    ii = occ(i,2) 
+    do j = i+1, Ne(2)
+     jj = occ(j,2) 
+     do m = 1, Ne(1)
+      mm = occ(m,1) 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) 
+      exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii)
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+   enddo
+
+   ! alpha/alpha/alpha three-body
+   do i = 1, Ne(1)
+    ii = occ(i,1) ! 1
+    do j = i+1, Ne(1)
+     jj = occ(j,1) ! 2 
+     do m = j+1, Ne(1)
+      mm = occ(m,1) ! 3 
+!      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
+      hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
+     enddo
+    enddo
+   enddo
+
+   ! beta/beta/beta three-body
+   do i = 1, Ne(2)
+    ii = occ(i,2) ! 1
+    do j = i+1, Ne(2)
+     jj = occ(j,2) ! 2
+     do m = j+1, Ne(2)
+      mm = occ(m,2) ! 3
+!      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
+      hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
+     enddo
+    enddo
+   enddo
+  endif
+
+end
+
+
+subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: direct_int,phase,exchange_int,three_e_single_parrallel_spin 
+  double precision              :: sym_3_e_int_from_6_idx_tensor
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2),key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+
+  hthree = 0.d0
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  if(core_tc_op)then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+    key_j_core(i,1) = xor(key_j(i,1),core_bitmask(i,1))
+    key_j_core(i,2) = xor(key_j(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  endif
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 1, h1, p1, h2, p2, s1, s2)
+
+   ! alpha/alpha/beta three-body
+!   print*,'IN SLAT RULES'
+   if(Ne(1)+Ne(2).ge.3)then
+     ! hole of spin s1 :: contribution from purely other spin 
+     ispin = other_spin(s1) ! ispin is the other spin than s1
+     do i = 1, Ne(ispin)  ! i is the orbitals of the other spin than s1  
+      ii = occ(i,ispin)  
+      do j = i+1, Ne(ispin) ! j has the same spin than s1 
+       jj = occ(j,ispin) 
+       !   is == ispin  in :::   s1 is is  s1 is is      s1 is is s1 is is
+       !                       < h1 j  i | p1 j  i > - < h1 j  i | p1 i j >
+       !                                                   
+       direct_int   = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1)  
+       exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) 
+       hthree += direct_int - exchange_int
+      enddo
+     enddo
+  
+     ! hole of spin s1 :: contribution from mixed other spin / same spin
+     do i = 1, Ne(ispin) ! other spin 
+      ii = occ(i,ispin)  ! other spin 
+      do j = 1, Ne(s1)   ! same spin 
+       jj = occ(j,s1)    ! same spin 
+       direct_int   = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1)
+       exchange_int = three_e_4_idx_exch13_bi_ort(jj,ii,p1,h1)
+       !              < h1 j  i | p1 j i > - < h1 j i | j p1 i >
+       hthree += direct_int - exchange_int
+      enddo
+     enddo
+!
+     ! hole of spin s1 :: PURE SAME SPIN CONTRIBUTIONS !!!
+     do i = 1, Ne(s1)
+      ii = occ(i,s1)
+      do j = i+1, Ne(s1)
+       jj = occ(j,s1)
+!       ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) 
+       hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR 
+      enddo
+     enddo
+   endif
+  hthree  *= phase
+
+end
+
+! ---
+
+subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation ONLY FOR THREE-BODY TERMS  WITH BI ORTHONORMAL ORBITALS
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: phase
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: direct_int,exchange_int,sym_3_e_int_from_6_idx_tensor
+  double precision              :: three_e_double_parrallel_spin
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hthree = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+
+  if(core_tc_op) then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  endif
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+    
+    if(Ne(1)+Ne(2).ge.3)then
+     if(s1==s2)then ! same spin excitation 
+      ispin = other_spin(s1)
+      do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1)
+       mm = occ(m,ispin)
+       direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+       exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+       hthree += direct_int - exchange_int
+      enddo
+      do m = 1, Ne(s1) ! pure contribution from s1 
+       mm = occ(m,s1)
+       hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1)
+      enddo 
+     else ! different spin excitation 
+       do m = 1, Ne(s1)
+        mm = occ(m,s1) ! 
+        direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+        exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
+        hthree += direct_int - exchange_int
+       enddo
+       do m = 1, Ne(s2)
+        mm = occ(m,s2) ! 
+        direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1)
+        exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
+        hthree += direct_int - exchange_int
+       enddo
+     endif
+    endif
+  hthree  *= phase
+ end
+
+! ---
+
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
new file mode 100644
index 00000000..a19d4688
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -0,0 +1,105 @@
+subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
+ implicit none
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the total matrix element 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+ integer,           intent(in) :: Nint
+ integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+ double precision, intent(out) :: htot
+ double precision :: hmono, htwoe, hthree
+ call htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+end
+subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element in terms of single, two and three electron contribution. 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: degree 
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  hthree = 0.D0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho  (Nint, key_i, hmono, htwoe, hthree, htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho(Nint,key_j, key_i , hmono, htwoe, hthree, htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+  endif
+
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
+
+subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: htot
+  integer                       :: degree 
+
+  htot   = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint,key_j, key_i , htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  endif
+
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
new file mode 100644
index 00000000..00cebf3a
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@@ -0,0 +1,473 @@
+ BEGIN_PROVIDER [ double precision, ref_tc_energy_tot]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_1e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_2e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_3e]
+ implicit none
+ BEGIN_DOC
+! Various component of the TC energy for the reference "HF" Slater determinant
+ END_DOC 
+ double precision :: hmono, htwoe, htot, hthree
+ call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot)
+ ref_tc_energy_1e = hmono
+ ref_tc_energy_2e = htwoe 
+ if(three_body_h_tc)then
+  call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree)
+  ref_tc_energy_3e = hthree
+ else
+  ref_tc_energy_3e = 0.d0
+ endif
+ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion
+ END_PROVIDER 
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$.
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: hmono,htwoe,htot,hthree
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    hthree= ref_tc_energy_3e
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  hthree= ref_tc_energy_3e
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe+hthree+nuclear_repulsion
+end
+
+subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision ::  direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree += three_e_diag_parrallel_spin_prov(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree += direct_int - exchange_int
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+  endif
+
+  na = na+1
+end
+
+subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree -= three_e_diag_parrallel_spin_prov(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree -= (direct_int - exchange_int)
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree -= (direct_int - exchange_int)
+     enddo
+    enddo
+  endif
+
+end
+
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$. WITHOUT ANY CONTRIBUTIONS FROM 3E TERMS
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: htot
+  double precision :: hmono,htwoe
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator_no_3e( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator_no_3e ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe
+end
+
+subroutine ac_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision ::  direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  na = na+1
+end
+
+subroutine a_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+end
+
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
new file mode 100644
index 00000000..baca498c
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -0,0 +1,477 @@
+
+subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+    if(three_body_h_tc)then
+     if(.not.double_normal_ord)then
+      if(degree_i>degree_j)then
+       call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+      else
+       call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+      endif
+     elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
+      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+     endif
+    endif
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+   if(three_body_h_tc)then
+    if(.not.double_normal_ord)then
+     if(degree_i>degree_j)then
+      call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+     else
+      call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+     endif
+    elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
+      htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
+      htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+    endif
+   endif
+  endif
+  hthree *= phase
+  htwoe  *= phase
+  htot    =  htwoe + hthree
+
+end
+
+
+
+subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+ implicit none
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ integer, intent(in) :: h1,h2,p1,p2,s1,s2
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+ integer :: ipart, ihole
+ double precision :: direct_int, exchange_int
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+  if(s1==s2.and.s1==1)then
+   !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc
+   hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) 
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+    !!!!!!!! the matrix element is already exact 
+    !!!!!!!! else you need to take care of holes and particles 
+    !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+    ispin = 1 ! i==alpha ==> pure same spin terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ipart=occ_particle(i,ispin)
+     hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
+     ihole=occ_hole(i,ispin)
+     hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
+    enddo
+    ispin = 2 ! i==beta ==> alpha/alpha/beta terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ! exchange between (h1,p1) and (h2,p2)
+     ipart=occ_particle(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+     hthree += direct_int - exchange_int
+     ihole=occ_hole(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+     hthree -= direct_int - exchange_int
+    enddo
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  elseif(s1==s2.and.s1==2)then 
+   !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc
+   hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 2 ! i==beta  ==> pure same spin terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ipart=occ_particle(i,ispin)
+    hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
+    ihole=occ_hole(i,ispin)
+    hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
+   enddo
+   ispin = 1 ! i==alpha==> beta/beta/alpha terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and (h2,p2)
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  else                         ! (h1,p1) == alpha/(h2,p2) == beta 
+   hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 1 ! i==alpha ==> alpha/beta/alpha terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+   ispin = 2 ! i==beta  ==> alpha/beta/beta  terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h2,p2) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+end
+
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations 
+!
+! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_abab(1,1,1,1,occ,Ne,contrib)
+ eff_2_e_from_3_e_ab = 0.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = 1, n_act_orb !! beta 
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = 1, n_act_orb !! beta 
+          p2 = list_act(pp2)
+          call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == alpha 
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha 
+   m = occ(mm,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and m
+   exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(mm,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h2,p2) and m
+   exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations 
+!
+! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_aaaa(1 ,1 ,1 ,1 ,occ,Ne,contrib)
+ eff_2_e_from_3_e_aa = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! alpha
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! alpha
+          p2 = list_act(pp2)
+          call give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_aa(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == alpha 
+ !! h2,p2 == alpha
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(mm,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations 
+!
+! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_bbbb(1,1 ,1 ,1 ,occ,Ne,contrib)
+ eff_2_e_from_3_e_bb = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! beta 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! beta
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! beta
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! beta
+          p2 = list_act(pp2)
+          call give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_bb(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == beta
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(2) !! beta ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(1) !! alpha
+   m = occ(mm,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+
+subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+  endif
+  htwoe  *= phase
+  htot    =  htwoe 
+
+end
+
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
new file mode 100644
index 00000000..7cff3c73
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -0,0 +1,572 @@
+
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,hthree,htot)
+end
+
+
+subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,htwoe,hthree,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,hthree,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ hthree = 0.d0
+ if (three_body_h_tc)then
+  call three_comp_fock_elem(key_i,h,p,spin,hthree)
+ endif
+
+
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ hthree = hthree * phase
+ htot  = htwoe + hmono + hthree
+
+end
+
+subroutine three_comp_fock_elem(key_i,h_fock,p_fock,ispin_fock,hthree)
+ implicit none
+ integer,intent(in) :: h_fock,p_fock,ispin_fock
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  !! Initialize the matrix element with the reference ROHF Slater determinant Fock element
+  if(ispin_fock==1)then
+   hthree = fock_a_tot_3e_bi_orth(p_fock,h_fock) 
+  else 
+   hthree = fock_b_tot_3e_bi_orth(p_fock,h_fock) 
+  endif
+  det_tmp = ref_bitmask
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call fock_ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+      !DIR$ FORCEINLINE
+      call fock_a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+    enddo
+  enddo
+end
+
+subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you ADD an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)              :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)           :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout)  :: hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,j
+  double precision :: direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree += three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  endif
+
+  na = na+1
+end
+
+subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you REMOVE an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hthree
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree -= three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+
+end
+
+
+BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+! Closed-shell part of the Fock operator for the TC operator
+ END_DOC
+ integer :: h0,p0,h,p,k0,k,i
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ double precision :: accu
+
+ fock_op_2_e_tc_closed_shell = -1000.d0
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do h0 = 1,  n_occ_ab_virt(1)
+  h=occ_virt(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab_virt(1)
+   p=occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+
+ ! docc ---> docc single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab(1)
+   p = occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab(1)
+  h = occ(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+! do i = 1, mo_num
+!  write(*,'(100(F10.5,X))')fock_op_2_e_tc_closed_shell(:,i)
+! enddo
+
+END_PROVIDER
+
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) ::  htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,htot)
+end
+
+
+subroutine get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h,p,spin,phase,hmono,htwoe,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ htot  = htwoe + hmono 
+
+end
+
diff --git a/src/tc_bi_ortho/spin_mulliken.irp.f b/src/tc_bi_ortho/spin_mulliken.irp.f
new file mode 100644
index 00000000..e225d299
--- /dev/null
+++ b/src/tc_bi_ortho/spin_mulliken.irp.f
@@ -0,0 +1,114 @@
+
+BEGIN_PROVIDER [double precision, tc_spin_population, (ao_num,ao_num,N_states)]
+ implicit none
+ integer :: i,j,istate
+ BEGIN_DOC
+! spin population on the ao basis :
+! tc_spin_population(i,j) = rho_AO(alpha)(i,j) - rho_AO(beta)(i,j) * <AO_i|AO_j>
+ END_DOC
+ tc_spin_population = 0.d0
+ if(only_spin_tc_right)then
+  do i = 1, ao_num
+   do j = 1, ao_num
+    tc_spin_population(j,i,1) = tc_spin_dens_right_only(j,i) * ao_overlap(j,i)
+   enddo
+  enddo
+ else
+  do istate = 1, N_states
+   do i = 1, ao_num
+    do j = 1, ao_num
+     tc_spin_population(j,i,istate) = tc_spin_transition_matrix_ao(j,i,istate,istate) * ao_overlap(j,i)
+    enddo
+   enddo
+  enddo
+ endif
+END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, tc_spin_population_angular_momentum, (0:ao_l_max,N_states)]
+&BEGIN_PROVIDER [double precision, tc_spin_population_angular_momentum_per_atom, (0:ao_l_max,nucl_num,N_states)]
+ implicit none
+ integer :: i,istate
+ double precision :: accu
+ tc_spin_population_angular_momentum = 0.d0
+ tc_spin_population_angular_momentum_per_atom = 0.d0
+ do istate = 1, N_states
+  do i = 1,  ao_num
+   tc_spin_population_angular_momentum(ao_l(i),istate) += tc_spin_gross_orbital_product(i,istate)
+   tc_spin_population_angular_momentum_per_atom(ao_l(i),ao_nucl(i),istate) += tc_spin_gross_orbital_product(i,istate)
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [double precision, tc_spin_gross_orbital_product, (ao_num,N_states)]
+ implicit none
+ tc_spin_gross_orbital_product = 0.d0
+ integer :: i,j,istate
+ BEGIN_DOC
+! gross orbital product for the spin population
+ END_DOC
+ do istate = 1, N_states
+  do i = 1, ao_num
+   do j = 1, ao_num
+    tc_spin_gross_orbital_product(i,istate) += tc_spin_population(j,i,istate)
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
+
+BEGIN_PROVIDER [double precision, tc_mulliken_spin_densities, (nucl_num,N_states)]
+ implicit none
+ integer :: i,j,istate
+ BEGIN_DOC
+!ATOMIC SPIN POPULATION (ALPHA MINUS BETA)
+ END_DOC
+ tc_mulliken_spin_densities = 0.d0
+ do istate = 1, N_states
+  do i = 1, ao_num
+   tc_mulliken_spin_densities(ao_nucl(i),istate) += tc_spin_gross_orbital_product(i,istate)
+  enddo
+ enddo
+
+END_PROVIDER
+
+subroutine tc_print_mulliken_sd
+ implicit none
+ double precision :: accu
+ integer :: i
+ integer :: j
+ print*,'Mulliken spin densities'
+ accu= 0.d0
+ do i = 1, nucl_num
+  print*,i,nucl_charge(i),tc_mulliken_spin_densities(i,1)
+  accu += tc_mulliken_spin_densities(i,1)
+ enddo
+ print*,'Sum of Mulliken SD = ',accu
+ print*,'AO SPIN POPULATIONS'
+ accu = 0.d0
+ do i = 1, ao_num
+  accu += tc_spin_gross_orbital_product(i,1)
+  write(*,'(1X,I3,1X,A4,1X,I2,1X,A4,1X,F10.7)')i,trim(element_name(int(nucl_charge(ao_nucl(i))))),ao_nucl(i),trim(l_to_character(ao_l(i))),tc_spin_gross_orbital_product(i,1)
+ enddo
+ print*,'sum = ',accu
+ accu = 0.d0
+ print*,'Angular momentum analysis'
+ do i = 0,  ao_l_max
+  accu += tc_spin_population_angular_momentum(i,1)
+  print*,' ',trim(l_to_character(i)),tc_spin_population_angular_momentum(i,1)
+ print*,'sum = ',accu
+ enddo
+ print*,'Angular momentum analysis per atom'
+ print*,'Angular momentum analysis'
+ do j = 1,nucl_num
+  accu = 0.d0
+  do i = 0,  ao_l_max
+   accu += tc_spin_population_angular_momentum_per_atom(i,j,1)
+   write(*,'(1X,I3,1X,A4,1X,A4,1X,F10.7)')j,trim(element_name(int(nucl_charge(j)))),trim(l_to_character(i)),tc_spin_population_angular_momentum_per_atom(i,j,1)
+   print*,'sum = ',accu
+  enddo
+ enddo
+
+end
+
diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f
new file mode 100644
index 00000000..e4f7ca93
--- /dev/null
+++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f
@@ -0,0 +1,111 @@
+subroutine give_all_perm_for_three_e(n,l,k,m,j,i,idx_list,phase)
+ implicit none
+ BEGIN_DOC
+ ! returns all the list of permutting indices for the antimmetrization of 
+ !
+ ! (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji>   when all indices have the same spins
+ !
+ ! idx_list(:,i) == list of the 6 indices corresponding the permutation "i"
+ !
+ ! phase(i) == phase of the permutation "i" 
+ !
+ ! there are in total 6 permutations with different indices
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ integer, intent(out) :: idx_list(6,6) 
+ double precision :: phase(6)
+ integer :: list(6)
+ !!! CYCLIC PERMUTATIONS 
+ phase(1:3) = 1.d0
+ !!! IDENTITY PERMUTATION 
+ list = (/n,l,k,m,j,i/)
+ idx_list(:,1) = list(:)
+ !!! FIRST CYCLIC PERMUTATION 
+ list = (/n,l,k,j,i,m/)
+ idx_list(:,2) = list(:)
+ !!! FIRST CYCLIC PERMUTATION 
+ list = (/n,l,k,i,m,j/)
+ idx_list(:,3) = list(:)
+
+ !!! NON CYCLIC PERMUTATIONS 
+ phase(1:3) = -1.d0
+ !!! PARTICLE 1 is FIXED
+ list = (/n,l,k,j,m,i/)
+ idx_list(:,4) = list(:)
+ !!! PARTICLE 2 is FIXED
+ list = (/n,l,k,i,j,m/)
+ idx_list(:,5) = list(:)
+ !!! PARTICLE 3 is FIXED
+ list = (/n,l,k,m,i,j/)
+ idx_list(:,6) = list(:)
+
+end
+
+double precision function sym_3_e_int_from_6_idx_tensor(n,l,k,m,j,i)
+ implicit none
+ BEGIN_DOC
+ ! returns all good combinations of permutations of integrals with the good signs 
+ !
+ ! for a given (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji> when all indices have the same spins
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ sym_3_e_int_from_6_idx_tensor = three_body_ints_bi_ort(n,l,k,m,j,i) & ! direct 
+                               + three_body_ints_bi_ort(n,l,k,j,i,m) & ! 1st cyclic permutation  
+                               + three_body_ints_bi_ort(n,l,k,i,m,j) & ! 2nd cyclic permutation  
+                               - three_body_ints_bi_ort(n,l,k,j,m,i) & ! elec 1 is kept fixed 
+                               - three_body_ints_bi_ort(n,l,k,i,j,m) & ! elec 2 is kept fixed
+                               - three_body_ints_bi_ort(n,l,k,m,i,j)   ! elec 3 is kept fixed
+
+end
+
+double precision function direct_sym_3_e_int(n,l,k,m,j,i)
+ implicit none
+ BEGIN_DOC
+ ! returns all good combinations of permutations of integrals with the good signs 
+ !
+ ! for a given (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji> when all indices have the same spins
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ double precision :: integral
+ direct_sym_3_e_int = 0.d0
+ call give_integrals_3_body_bi_ort(n,l,k,m,j,i,integral)   ! direct 
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,j,i,m,integral)   ! 1st cyclic permutation  
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,i,m,j,integral)   ! 2nd cyclic permutation  
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,j,m,i,integral)   ! elec 1 is kept fixed 
+ direct_sym_3_e_int += -integral
+ call give_integrals_3_body_bi_ort(n,l,k,i,j,m,integral)   ! elec 2 is kept fixed
+ direct_sym_3_e_int += -integral
+ call give_integrals_3_body_bi_ort(n,l,k,m,i,j,integral)   ! elec 3 is kept fixed
+ direct_sym_3_e_int += -integral
+
+end
+
+double precision function three_e_diag_parrallel_spin(m,j,i)
+ implicit none
+ integer, intent(in) :: i,j,m
+  three_e_diag_parrallel_spin = three_e_3_idx_direct_bi_ort(m,j,i)  ! direct
+  three_e_diag_parrallel_spin += three_e_3_idx_cycle_1_bi_ort(m,j,i) + three_e_3_idx_cycle_2_bi_ort(m,j,i) & ! two cyclic permutations 
+  - three_e_3_idx_exch23_bi_ort(m,j,i) - three_e_3_idx_exch13_bi_ort(m,j,i)  & ! two first exchange 
+  - three_e_3_idx_exch12_bi_ort(m,j,i) ! last exchange 
+end
+
+double precision function three_e_single_parrallel_spin(m,j,k,i)
+ implicit none
+ integer, intent(in) :: i,k,j,m
+  three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i)  ! direct
+  three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations 
+  - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i)  & ! two first exchange 
+  - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange 
+end
+
+double precision function three_e_double_parrallel_spin(m,l,j,k,i)
+ implicit none
+ integer, intent(in) :: i,k,j,m,l
+  three_e_double_parrallel_spin = three_e_5_idx_direct_bi_ort(m,l,j,k,i)  ! direct
+  three_e_double_parrallel_spin += three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) & ! two cyclic permutations 
+  - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i)  & ! two first exchange 
+  - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange 
+end
diff --git a/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
new file mode 100644
index 00000000..e8277a74
--- /dev/null
+++ b/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
@@ -0,0 +1,140 @@
+
+BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS 
+  !
+  ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form <mji|-L|mji> for same spin matrix elements  
+  ! 
+  ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin
+
+  three_e_diag_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_diag_parrallel_spin_prov ...'
+
+ integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs
+  call wall_time(wall0)
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        three_e_diag_parrallel_spin_prov(m,j,i) =  three_e_diag_parrallel_spin(m,j,i)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of <mjk|-L|mji> for same spin matrix elements 
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+ implicit none
+ integer          :: i, j, k, m
+ double precision :: integral, wall1, wall0, three_e_single_parrallel_spin
+
+  three_e_single_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_single_parrallel_spin_prov ...'
+
+  integral = three_e_single_parrallel_spin(1,1,1,1)
+  call wall_time(wall0)
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0, three_e_double_parrallel_spin
+
+  three_e_double_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_double_parrallel_spin_prov ...'
+  call wall_time(wall0)
+
+ integral = three_e_double_parrallel_spin(1,1,1,1,1)
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f
new file mode 100644
index 00000000..2d51f6f0
--- /dev/null
+++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f
@@ -0,0 +1,53 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call routine_diag
+  call save_tc_bi_ortho_wavefunction
+end
+
+subroutine test
+ implicit none
+ integer :: i,j 
+ double precision :: hmono,htwoe,hthree,htot
+ use bitmasks
+ print*,'reading the wave function '
+ do i = 1, N_det
+  call debug_det(psi_det(1,1,i),N_int)
+  print*,i,psi_l_coef_bi_ortho(i,1)*psi_r_coef_bi_ortho(i,1)
+  print*,i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1)
+ enddo
+
+end
+
+subroutine routine_diag
+ implicit none
+! provide eigval_right_tc_bi_orth
+!  provide overlap_bi_ortho
+!  provide htilde_matrix_elmt_bi_ortho
+ integer ::i,j
+ print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1)
+ print*,'e_tc_left_right         = ',e_tc_left_right
+ print*,'e_tilde_bi_orth_00      = ',e_tilde_bi_orth_00
+ print*,'e_pt2_tc_bi_orth        = ',e_pt2_tc_bi_orth
+ print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single
+ print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double
+ print*,'***'
+ print*,'e_corr_bi_orth          = ',e_corr_bi_orth
+ print*,'e_corr_bi_orth_proj     = ',e_corr_bi_orth_proj
+ print*,'e_corr_single_bi_orth   = ',e_corr_single_bi_orth
+ print*,'e_corr_double_bi_orth   = ',e_corr_double_bi_orth
+ print*,'Left/right eigenvectors'
+ do i = 1,N_det
+  write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
+ enddo
+end
+
diff --git a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f
new file mode 100644
index 00000000..28f122ee
--- /dev/null
+++ b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f
@@ -0,0 +1,24 @@
+program tc_bi_ortho_prop
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!  call routine_diag
+ call test
+end
+
+subroutine test
+ implicit none
+ integer :: i
+ print*,'TC Dipole components'
+ do i= 1, 3
+  print*,tc_bi_ortho_dipole(i,1)
+ enddo
+end
diff --git a/src/tc_bi_ortho/tc_cisd_sc2.irp.f b/src/tc_bi_ortho/tc_cisd_sc2.irp.f
new file mode 100644
index 00000000..0fb9f524
--- /dev/null
+++ b/src/tc_bi_ortho/tc_cisd_sc2.irp.f
@@ -0,0 +1,24 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call test
+end
+
+subroutine test
+ implicit none
+! double precision, allocatable :: dressing_dets(:),e_corr_dets(:)
+! allocate(dressing_dets(N_det),e_corr_dets(N_det))
+! e_corr_dets = 0.d0
+! call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets)
+  provide eigval_tc_cisd_sc2_bi_ortho
+end
diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
new file mode 100644
index 00000000..4ae44148
--- /dev/null
+++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
@@ -0,0 +1,145 @@
+ BEGIN_PROVIDER [ double precision, reigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)]
+&BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)]
+&BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)]
+ implicit none
+ integer :: it,n_real,degree,i,istate
+ double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu
+ double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:)
+ double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
+ allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det))
+ allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det))
+ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states))
+ dressing_dets = 0.d0
+ do i = 1, N_det
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree == 1 .or. degree == 2)then
+   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
+  endif
+ enddo
+ reigvec_tc_bi_orth_tmp = 0.d0
+ do i = 1, N_det 
+  reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) 
+ enddo
+ vec_tmp = 0.d0
+ do istate = 1, N_states
+  vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate)
+ enddo
+ do istate = N_states+1, n_states_diag
+  vec_tmp(istate,istate) = 1.d0
+ enddo
+ print*,'Diagonalizing the TC CISD '
+ call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+ do i = 1, N_det 
+  e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
+ enddo
+ E_before = eigval_tmp(1)
+ print*,'Starting from ',E_before
+
+ e_current = 10.d0
+ thr = 1.d-5
+ it = 0
+ dressing_dets = 0.d0
+  double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:)
+  external                         htc_bi_ortho_calc_tdav
+  external                         htcdag_bi_ortho_calc_tdav
+  logical                       :: converged
+ do while (dabs(E_before-E_current).gt.thr)
+  it += 1
+  E_before = E_current
+!  h_sc2 = htilde_matrix_elmt_bi_ortho
+  call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets)
+  do i = 1, N_det
+!   print*,'dressing_dets(i) = ',dressing_dets(i)
+   h_sc2(i,i) += dressing_dets(i)
+  enddo
+  print*,'********************'
+  print*,'iteration       ',it
+!  call non_hrmt_real_diag(N_det,h_sc2,& 
+!       leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
+!       n_real,eigval_right_tmp)
+!  print*,'eigval_right_tmp(1)',eigval_right_tmp(1)
+  vec_tmp = 0.d0
+  do istate = 1, N_states
+   vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate)
+  enddo
+  do istate = N_states+1, n_states_diag
+   vec_tmp(istate,istate) = 1.d0
+  enddo
+  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+  print*,'outside Davidson'
+  print*,'eigval_tmp(1) = ',eigval_tmp(1)
+  do i = 1, N_det 
+   reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1)
+   e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
+  enddo
+!  E_current = eigval_right_tmp(1)
+  E_current = eigval_tmp(1)
+  print*,'it, E(SC)^2 = ',it,E_current
+ enddo
+ eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states)
+ reigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = reigvec_tc_bi_orth_tmp(1:N_det,1:N_states)
+ leigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = leigvec_tc_bi_orth_tmp(1:N_det,1:N_states)
+ 
+END_PROVIDER 
+
+subroutine get_cisd_sc2_dressing(dets,e_corr_dets,ndet,dressing_dets)
+ implicit none
+  use bitmasks
+ integer, intent(in) :: ndet
+ integer(bit_kind), intent(in)  :: dets(N_int,2,ndet)
+ double precision, intent(in)   :: e_corr_dets(ndet)
+ double precision, intent(out) :: dressing_dets(ndet)
+ integer, allocatable  :: degree(:),hole(:,:),part(:,:),spin(:,:)
+ integer(bit_kind), allocatable :: hole_part(:,:,:)
+ integer :: i,j,k, exc(0:2,2,2),h1,p1,h2,p2,s1,s2
+ integer(bit_kind) :: xorvec(2,N_int)
+
+ double precision :: phase
+ dressing_dets = 0.d0
+ allocate(degree(ndet),hole(2,ndet),part(2,ndet), spin(2,ndet),hole_part(N_int,2,ndet))
+ do i = 2, ndet
+  call get_excitation_degree(HF_bitmask,dets(1,1,i),degree(i),N_int)
+  do j = 1, N_int
+   hole_part(j,1,i) = xor( HF_bitmask(j,1), dets(j,1,i))
+   hole_part(j,2,i) = xor( HF_bitmask(j,2), dets(j,2,i))
+  enddo
+  if(degree(i) == 1)then
+   call get_single_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int)
+  else if(degree(i) == 2)then
+   call get_double_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int)
+  endif
+  call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+  hole(1,i) = h1
+  hole(2,i) = h2
+  part(1,i) = p1
+  part(2,i) = p2
+  spin(1,i) = s1
+  spin(2,i) = s2
+ enddo
+ 
+ integer :: same
+ if(elec_alpha_num+elec_beta_num<3)return
+ do i = 2, ndet
+  do j = i+1, ndet
+   same = 0
+   if(degree(i) == degree(j) .and. degree(i)==1)cycle
+   do k = 1, N_int
+    xorvec(k,1) = iand(hole_part(k,1,i),hole_part(k,1,j))
+    xorvec(k,2) = iand(hole_part(k,2,i),hole_part(k,2,j))
+    same += popcnt(xorvec(k,1)) + popcnt(xorvec(k,2)) 
+   enddo
+!   print*,'i,j',i,j
+!   call debug_det(dets(1,1,i),N_int) 
+!   call debug_det(hole_part(1,1,i),N_int) 
+!   call debug_det(dets(1,1,j),N_int) 
+!   call debug_det(hole_part(1,1,j),N_int) 
+!   print*,'same = ',same
+   if(same.eq.0)then
+    dressing_dets(i) += e_corr_dets(j)  
+    dressing_dets(j) += e_corr_dets(i)  
+   endif
+  enddo
+ enddo
+ 
+end
diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
new file mode 100644
index 00000000..f2cbb637
--- /dev/null
+++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
@@ -0,0 +1,202 @@
+  use bitmasks
+
+ BEGIN_PROVIDER [ integer, index_HF_psi_det]                                                                                                            
+ implicit none
+ integer :: i,degree
+ do i = 1, N_det
+   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+   if(degree == 0)then
+    index_HF_psi_det = i
+    exit
+   endif
+ enddo
+ END_PROVIDER
+
+subroutine diagonalize_CI_tc
+  implicit none
+  BEGIN_DOC
+!  Replace the coefficients of the |CI| states by the coefficients of the
+!  eigenstates of the |CI| matrix.
+  END_DOC
+  integer                        :: i,j
+  do j=1,N_states
+    do i=1,N_det
+      psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j)
+      psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j)
+    enddo
+  enddo
+!  psi_energy(1:N_states) = CI_electronic_energy(1:N_states)
+!  psi_s2(1:N_states) = CI_s2(1:N_states)
+
+  SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+end
+
+
+
+ BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states)]
+&BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)]
+&BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)]
+&BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)]
+&BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ]
+
+  BEGIN_DOC
+  ! eigenvalues, right and left eigenvectors of the transcorrelated Hamiltonian on the BI-ORTHO basis 
+  END_DOC
+
+  implicit none
+  integer                       :: i, idx_dress, j, istate
+  logical                       :: converged, dagger
+  integer                       :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l
+  double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
+
+  PROVIDE N_det N_int
+
+   if(n_det.le.N_det_max_full)then
+    allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det))
+    call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& 
+         leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
+         n_real_tc_bi_orth_eigval_right,eigval_right_tmp)
+    double precision, allocatable :: coef_hf_r(:),coef_hf_l(:)
+    integer, allocatable :: iorder(:)
+    allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
+    do i = 1,N_det
+     iorder(i) = i
+     coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
+    enddo
+    call dsort(coef_hf_r,iorder,N_det)
+    igood_r = iorder(1)
+    print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
+    do i = 1,N_det
+     iorder(i) = i
+     coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
+    enddo
+    call dsort(coef_hf_l,iorder,N_det)
+    igood_l = iorder(1)
+    print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
+
+    if(igood_r.ne.igood_l.and.igood_r.ne.1)then
+     print *,''
+     print *,'Warning, the left and right eigenvectors are "not the same" '
+     print *,'Warning, the ground state is not dominated by HF...'
+     print *,'State with largest RIGHT coefficient of HF ',igood_r
+     print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r)
+     print *,'State with largest LEFT  coefficient of HF ',igood_l
+     print *,'coef of HF in LEFT  eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l)
+    endif
+    if(state_following_tc)then
+     print *,'Following the states with the largest coef on HF'
+     print *,'igood_r,igood_l',igood_r,igood_l
+     i= igood_r
+     eigval_right_tc_bi_orth(1) = eigval_right_tmp(i)
+     do j = 1, N_det
+       reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i)
+!       print*,reigvec_tc_bi_orth(j,1)
+     enddo
+     i= igood_l
+     eigval_left_tc_bi_orth(1)  = eigval_right_tmp(i)
+     do j = 1, N_det
+       leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i)
+     enddo
+    else 
+     do i = 1, N_states
+       eigval_right_tc_bi_orth(i) = eigval_right_tmp(i)
+       eigval_left_tc_bi_orth(i)  = eigval_right_tmp(i)
+       do j = 1, N_det
+         reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i)
+         leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i)
+       enddo
+     enddo
+    endif
+   else 
+    double precision, allocatable :: H_jj(:),vec_tmp(:,:)
+    external                         htc_bi_ortho_calc_tdav
+    external                         htcdag_bi_ortho_calc_tdav
+    external                         H_tc_u_0_opt
+    external                         H_tc_dagger_u_0_opt
+    allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
+    do i = 1, N_det
+      call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+    enddo
+ !!!! Preparing the left-eigenvector
+    print*,'Computing the left-eigenvector '
+    vec_tmp = 0.d0
+    do istate = 1, N_states
+     vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
+    enddo
+    do istate = N_states+1, n_states_diag
+     vec_tmp(istate,istate) = 1.d0
+    enddo
+!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav)
+    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt)
+    do istate = 1, N_states
+     leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+    enddo
+
+    print*,'Computing the right-eigenvector '
+ !!!! Preparing the right-eigenvector
+    vec_tmp = 0.d0
+    do istate = 1, N_states
+     vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
+    enddo
+    do istate = N_states+1, n_states_diag
+     vec_tmp(istate,istate) = 1.d0
+    enddo
+!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
+    do istate = 1, N_states
+     reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+    enddo
+
+    deallocate(H_jj)
+   endif
+  call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states)
+   print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1)
+   norm_ground_left_right_bi_orth = 0.d0
+   do j = 1, N_det
+    norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)
+   enddo
+   print*,'norm l/r = ',norm_ground_left_right_bi_orth
+
+END_PROVIDER 
+
+
+
+subroutine bi_normalize(u_l,u_r,n,ld,nstates)
+  !!!! Normalization of the scalar product of the left/right eigenvectors
+  double precision, intent(inout) :: u_l(ld,nstates), u_r(ld,nstates)
+  integer, intent(in) :: n,ld,nstates
+  integer :: i
+  double precision  :: accu, tmp 
+  do i = 1, nstates
+   !!!! Normalization of right eigenvectors |Phi>
+   accu = 0.d0
+   do j = 1, n
+    accu += u_r(j,i) * u_r(j,i)
+   enddo
+   accu = 1.d0/dsqrt(accu)
+   print*,'accu_r = ',accu
+   do j = 1, n
+    u_r(j,i) *= accu 
+   enddo
+   tmp = u_r(1,i) / dabs(u_r(1,i))
+   do j = 1, n
+    u_r(j,i) *= tmp
+   enddo
+   !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
+   accu = 0.d0
+   do j = 1, n
+    accu += u_l(j,i) * u_r(j,i)
+!    print*,j, u_l(j,i) , u_r(j,i)
+   enddo
+   if(accu.gt.0.d0)then
+    accu = 1.d0/dsqrt(accu)
+   else
+    accu = 1.d0/dsqrt(-accu)
+   endif
+   tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
+   do j = 1, n
+    u_l(j,i) *= accu * tmp
+    u_r(j,i) *= accu 
+   enddo
+  enddo
+end
diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f
new file mode 100644
index 00000000..44e27e7c
--- /dev/null
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@@ -0,0 +1,45 @@
+
+ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)]
+
+  BEGIN_DOC
+  ! htilde_matrix_elmt_bi_ortho(j,i) = <J| H^tilde |I> 
+  !
+  ! WARNING !!!!!!!!! IT IS NOT HERMITIAN !!!!!!!!!
+  END_DOC
+ 
+  implicit none
+  integer          :: i, j
+  double precision :: hmono,htwoe,hthree,htot
+
+  PROVIDE N_int
+ !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) &
+ !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho)
+    do i = 1, N_det
+      do j = 1, N_det
+        ! < J | Htilde | I >
+        call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+
+        !print *, ' hmono  = ', hmono
+        !print *, ' htwoe  = ', htwoe
+        !print *, ' hthree = ', hthree
+        htilde_matrix_elmt_bi_ortho(j,i) = htot
+      enddo
+    enddo
+ !$OMP END PARALLEL DO
+! print*,'htilde_matrix_elmt_bi_ortho = '
+! do i = 1, min(100,N_det)
+!  write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i)
+! enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho_tranp, (N_det,N_det)]
+ implicit none
+ integer ::i,j
+  do i = 1, N_det
+    do j = 1, N_det
+      htilde_matrix_elmt_bi_ortho_tranp(j,i) = htilde_matrix_elmt_bi_ortho(i,j)
+    enddo
+  enddo
+END_PROVIDER 
diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f
new file mode 100644
index 00000000..b7e5ae81
--- /dev/null
+++ b/src/tc_bi_ortho/tc_natorb.irp.f
@@ -0,0 +1,218 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)]
+
+  BEGIN_DOC
+  !
+  ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_leigvec_mo : LEFT  eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_eigval     : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !!
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k
+  double precision              :: thr_d, thr_nd, thr_deg, accu
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: dm_tmp(:,:), fock_diag(:)
+
+  allocate(dm_tmp(mo_num,mo_num), fock_diag(mo_num))
+
+  dm_tmp(1:mo_num,1:mo_num) = -tc_transition_matrix_mo(1:mo_num,1:mo_num,1,1)
+
+  print *, ' dm_tmp'
+  do i = 1, mo_num
+    fock_diag(i) = fock_matrix_tc_mo_tot(i,i)
+    write(*, '(100(F16.10,X))') -dm_tmp(:,i)
+  enddo
+
+  thr_d   = 1.d-6
+  thr_nd  = 1.d-6
+  thr_deg = 1.d-3
+  call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & 
+                              , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval)
+!   call non_hrmt_bieig( mo_num, dm_tmp&
+!                      , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& 
+!                      , mo_num, natorb_tc_eigval )
+
+  accu = 0.d0
+  do i = 1, mo_num
+    print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i)
+    accu += -natorb_tc_eigval(i)
+  enddo
+  print *, ' accu = ', accu
+
+  dm_tmp = 0.d0
+  do i = 1, mo_num
+    accu = 0.d0
+    do k = 1, mo_num
+      accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i)
+    enddo
+    accu = 1.d0/dsqrt(dabs(accu))
+    natorb_tc_reigvec_mo(:,i) *= accu
+    natorb_tc_leigvec_mo(:,i) *= accu
+    do j = 1, mo_num
+      do k = 1, mo_num
+        dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j)
+      enddo
+    enddo
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    accu_d += dm_tmp(i,i)
+    !write(*,'(100(F16.10,X))')dm_tmp(:,i)
+    do j = 1, mo_num
+      if(i==j)cycle
+      accu_nd += dabs(dm_tmp(j,i))
+    enddo
+  enddo
+  print *, ' Trace of the overlap between TC natural orbitals     ', accu_d
+  print *, ' L1 norm of extra diagonal elements of overlap matrix ', accu_nd
+
+  deallocate(dm_tmp, fock_diag)
+ 
+END_PROVIDER 
+
+! ---
+ 
+ BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)]
+
+  implicit none
+  integer                       :: i,j,k
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: fock_diag(:)
+
+  print *, ' Diagonal elements of the Fock matrix before '
+
+  do i = 1, mo_num
+   write(*,*) i, Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+  allocate(fock_diag(mo_num))
+  fock_diag = 0.d0
+  do i = 1, mo_num
+    fock_diag(i) = 0.d0
+    do j = 1, mo_num
+      do k = 1, mo_num
+        fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) 
+      enddo
+    enddo
+  enddo
+
+  allocate(iorder(mo_num))
+  do i = 1, mo_num
+   iorder(i) = i
+  enddo 
+  call dsort(fock_diag, iorder, mo_num)
+
+  print *, ' Diagonal elements of the Fock matrix after '
+  do i = 1, mo_num
+   write(*,*) i, fock_diag(i)
+  enddo
+  deallocate(fock_diag)
+
+  do i = 1, mo_num 
+    fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i))
+    do j = 1, mo_num
+      fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i))
+      fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i))
+    enddo
+  enddo
+  deallocate(iorder)
+ 
+END_PROVIDER 
+ 
+! --- 
+ 
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ]
+ 
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
+  !
+  ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo
+  END_DOC
+ 
+  implicit none
+  integer                       :: i, j, k, q, p
+  double precision              :: accu, accu_d
+  double precision, allocatable :: tmp(:,:)
+ 
+ 
+ !  ! MO_R x R
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_r_coef, size(mo_r_coef, 1)                   &
+             , fock_diag_sorted_r_natorb, size(fock_diag_sorted_r_natorb, 1) &
+             , 0.d0, natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) )
+ !
+   ! MO_L x L
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_l_coef, size(mo_l_coef, 1)                   &
+             , fock_diag_sorted_l_natorb, size(fock_diag_sorted_l_natorb, 1) &
+             , 0.d0, natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1) )
+ 
+ 
+   allocate( tmp(mo_num,ao_num) )
+ 
+   ! tmp <-- L.T x S_ao
+   call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+             , natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+             , 0.d0, tmp, size(tmp, 1) )
+ 
+   ! S <-- tmp x R
+   call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+             , tmp, size(tmp, 1), natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) &
+             , 0.d0, overlap_natorb_tc_eigvec_ao, size(overlap_natorb_tc_eigvec_ao, 1) )
+ 
+   deallocate( tmp )
+ 
+   ! ---
+   double precision :: norm
+   do i = 1, mo_num
+    norm = 1.d0/dsqrt(dabs(overlap_natorb_tc_eigvec_ao(i,i)))
+    do j = 1, mo_num
+     natorb_tc_reigvec_ao(j,i) *= norm
+     natorb_tc_leigvec_ao(j,i) *= norm
+    enddo
+   enddo
+ 
+   allocate( tmp(mo_num,ao_num) )
+ 
+   ! tmp <-- L.T x S_ao
+   call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+             , natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+             , 0.d0, tmp, size(tmp, 1) )
+ 
+   ! S <-- tmp x R
+   call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+             , tmp, size(tmp, 1), natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) &
+             , 0.d0, overlap_natorb_tc_eigvec_ao, size(overlap_natorb_tc_eigvec_ao, 1) )
+ 
+ 
+ 
+   deallocate( tmp )
+ 
+   accu_d = 0.d0
+   accu = 0.d0
+   do i = 1, mo_num
+     accu_d += overlap_natorb_tc_eigvec_ao(i,i)
+     do j = 1, mo_num
+       if(i==j)cycle
+       accu += dabs(overlap_natorb_tc_eigvec_ao(j,i))
+     enddo
+   enddo
+   print*,'Trace of the overlap_natorb_tc_eigvec_ao           = ',accu_d
+   print*,'mo_num                                             = ',mo_num
+   print*,'L1 norm of extra diagonal elements of overlap matrix ',accu
+   accu = accu / dble(mo_num**2)
+ 
+ END_PROVIDER
+
diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f
new file mode 100644
index 00000000..5bb0e2c0
--- /dev/null
+++ b/src/tc_bi_ortho/tc_prop.irp.f
@@ -0,0 +1,157 @@
+
+ BEGIN_PROVIDER [ double precision, tc_transition_matrix_mo_beta, (mo_num, mo_num,N_states,N_states) ]
+&BEGIN_PROVIDER [ double precision, tc_transition_matrix_mo_alpha, (mo_num, mo_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! tc_transition_matrix_mo_alpha(p,h,istate,jstate) = <Chi_istate| a^\dagger_p,alpha a_h,alpha |Phi_jstate>
+ !
+ ! tc_transition_matrix_mo_beta(p,h,istate,jstate) = <Chi_istate| a^\dagger_p,beta a_h,beta |Phi_jstate>
+ !
+ ! where <Chi_istate| and |Phi_jstate> are the left/right eigenvectors on a bi-ortho basis
+ END_DOC
+ integer :: i,j,istate,jstate,m,n,p,h
+ double precision :: phase
+ integer, allocatable           :: occ(:,:)
+ integer                        :: n_occ_ab(2),degree,exc(0:2,2,2)
+ allocate(occ(N_int*bit_kind_size,2))
+ tc_transition_matrix_mo_alpha = 0.d0
+ tc_transition_matrix_mo_beta  = 0.d0
+   do i = 1, N_det
+    do j = 1, N_det
+     call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
+     if(degree.gt.1)cycle
+     do istate = 1, N_states
+      do jstate = 1, N_states
+       if (degree == 0)then
+        call bitstring_to_list_ab(psi_det(1,1,i), occ, n_occ_ab, N_int)
+        do p = 1, n_occ_ab(1) ! browsing the alpha electrons
+         m = occ(p,1)
+         tc_transition_matrix_mo_alpha(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+        enddo
+        do p = 1, n_occ_ab(2) ! browsing the beta electrons
+         m = occ(p,1)
+         tc_transition_matrix_mo_beta(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+        enddo
+       else
+        call get_single_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,phase,N_int)
+        if (exc(0,1,1) == 1) then
+          ! Single alpha
+          h = exc(1,1,1) ! hole in psi_det(1,1,j) 
+          p = exc(1,2,1) ! particle in psi_det(1,1,j) 
+          tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+        else
+          ! Single beta
+          h = exc(1,1,2) ! hole in psi_det(1,1,j) 
+          p = exc(1,2,2) ! particle in psi_det(1,1,j) 
+          tc_transition_matrix_mo_beta(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+        endif
+       endif
+     enddo
+    enddo
+  enddo
+ enddo
+ END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, tc_transition_matrix_mo, (mo_num, mo_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! tc_transition_matrix_mo(p,h,istate,jstate) = \sum_{sigma=alpha,beta} <Chi_istate| a^\dagger_p,sigma a_h,sigma |Phi_jstate>
+ !
+ ! where <Chi_istate| and |Phi_jstate> are the left/right eigenvectors on a bi-ortho basis
+ END_DOC
+ tc_transition_matrix_mo = tc_transition_matrix_mo_beta + tc_transition_matrix_mo_alpha
+ END_PROVIDER 
+ 
+
+ BEGIN_PROVIDER [double precision, tc_spin_transition_matrix_mo, (mo_num, mo_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! tc_spin_transition_matrix_mo = tc_transition_matrix_mo_alpha - tc_transition_matrix_mo_beta
+ !
+ ! where <Chi_istate| and |Phi_jstate> are the left/right eigenvectors on a bi-ortho basis
+ END_DOC
+ tc_spin_transition_matrix_mo = tc_transition_matrix_mo_alpha - tc_transition_matrix_mo_beta 
+ END_PROVIDER 
+
+
+ BEGIN_PROVIDER [double precision, tc_bi_ortho_dipole, (3,N_states)]
+ implicit none
+ integer :: i,j,istate,m
+ double precision :: nuclei_part(3)
+ tc_bi_ortho_dipole = 0.d0
+ do istate = 1, N_states
+  do i = 1, mo_num
+   do j = 1, mo_num
+    tc_bi_ortho_dipole(1,istate) += -(tc_transition_matrix_mo(j,i,istate,istate)) *  mo_bi_orth_bipole_x(j,i)
+    tc_bi_ortho_dipole(2,istate) += -(tc_transition_matrix_mo(j,i,istate,istate)) *  mo_bi_orth_bipole_y(j,i)
+    tc_bi_ortho_dipole(3,istate) += -(tc_transition_matrix_mo(j,i,istate,istate)) *  mo_bi_orth_bipole_z(j,i)
+   enddo
+  enddo
+ enddo
+
+ nuclei_part = 0.d0
+ do m = 1, 3
+  do i = 1,nucl_num
+   nuclei_part(m) += nucl_charge(i) * nucl_coord(i,m)
+  enddo
+ enddo
+!
+ do istate = 1, N_states
+  do m = 1, 3
+    tc_bi_ortho_dipole(m,istate) += nuclei_part(m)
+  enddo
+ enddo
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, tc_transition_matrix_ao, (ao_num, ao_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC 
+! tc_transition_matrix(p,h,istate,jstate) in the AO basis  
+ END_DOC 
+ integer :: i,j,k,l
+ double precision :: dm_mo
+ tc_transition_matrix_ao = 0.d0
+ integer :: istate,jstate
+ do istate = 1, N_states
+  do jstate = 1, N_states
+    do i = 1, mo_num
+      do j = 1, mo_num
+        dm_mo = tc_transition_matrix_mo(j,i,jstate,istate)
+        do k = 1, ao_num
+          do l = 1, ao_num
+            tc_transition_matrix_ao(l,k,jstate,istate) += mo_l_coef(l,j) * mo_r_coef(k,i) * dm_mo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ enddo
+ 
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, tc_spin_transition_matrix_ao, (ao_num, ao_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC 
+! tc_spin_transition_matrix_ao(p,h,istate,jstate) in the AO basis  
+ END_DOC 
+ integer :: i,j,k,l
+ double precision :: dm_mo
+ tc_spin_transition_matrix_ao = 0.d0
+ integer :: istate,jstate
+ do istate = 1, N_states
+  do jstate = 1, N_states
+    do i = 1, mo_num
+      do j = 1, mo_num
+        dm_mo = tc_spin_transition_matrix_mo(j,i,jstate,istate)
+        do k = 1, ao_num
+          do l = 1, ao_num
+            tc_spin_transition_matrix_ao(l,k,jstate,istate) += mo_l_coef(l,j) * mo_r_coef(k,i) * dm_mo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ enddo
+ 
+ END_PROVIDER 
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
new file mode 100644
index 00000000..291c52ef
--- /dev/null
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -0,0 +1,70 @@
+! ---
+
+program tc_som
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+  print *, ' do not forget to do tc-scf first'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  read_wf = .true.
+  touch read_wf
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer          :: i, i_HF, degree
+  double precision :: hmono_1, htwoe_1, hthree_1, htot_1
+  double precision :: hmono_2, htwoe_2, hthree_2, htot_2
+  double precision :: U_SOM
+
+  PROVIDE N_int N_det
+
+  do i = 1, N_det
+    call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int)
+    if(degree == 0) then
+      i_HF = i
+      exit
+    endif
+  enddo
+  print *, ' HF determinants:', i_HF
+  print *, '          N_det :', N_det
+
+  U_SOM = 0.d0 
+  do i = 1, N_det
+    if(i == i_HF) cycle
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    U_SOM += htot_1 * htot_2
+  enddo
+  U_SOM = 0.5d0 * U_SOM
+  print *, ' U_SOM = ', U_SOM
+  
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_bi_ortho/test_natorb.irp.f b/src/tc_bi_ortho/test_natorb.irp.f
new file mode 100644
index 00000000..54c9a827
--- /dev/null
+++ b/src/tc_bi_ortho/test_natorb.irp.f
@@ -0,0 +1,51 @@
+program test_natorb
+  implicit none
+  BEGIN_DOC
+! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call routine
+! call test
+
+end
+
+subroutine routine
+ implicit none
+ double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:)
+ allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num))
+ double precision, allocatable :: eigval_ref(:),leigvec_ref(:,:),reigvec_ref(:,:)
+ allocate(eigval_ref(mo_num),leigvec_ref(mo_num,mo_num),reigvec_ref(mo_num,mo_num))
+ 
+ double precision :: thr_deg
+ integer :: i,n_real,j
+ print*,'fock_matrix'
+ do i = 1, mo_num
+  fock_diag(i) = Fock_matrix_mo(i,i)
+  print*,i,fock_diag(i)
+ enddo
+ thr_deg = 1.d-6
+ mat_ref = -one_e_dm_mo
+ print*,'diagonalization by block'
+ call diag_mat_per_fock_degen(fock_diag,mat_ref,mo_num,thr_deg,leigvec,reigvec,eigval)
+ call non_hrmt_bieig( mo_num, mat_ref&
+                     , leigvec_ref, reigvec_ref& 
+                     , n_real, eigval_ref)
+ print*,'TEST ***********************************'
+ double precision :: accu_l, accu_r
+ do i = 1, mo_num
+  accu_l = 0.d0
+  accu_r = 0.d0
+  do j = 1, mo_num
+   accu_r += reigvec_ref(j,i) * reigvec(j,i)
+   accu_l += leigvec_ref(j,i) * leigvec(j,i)
+  enddo
+  print*,i
+  write(*,'(I3,X,100(F16.10,X))')i,eigval(i),eigval_ref(i),accu_l,accu_r
+ enddo
+end
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
new file mode 100644
index 00000000..118e481a
--- /dev/null
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -0,0 +1,131 @@
+program test_normal_order
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call provide_all_three_ints_bi_ortho
+  call test
+end
+
+subroutine test
+ implicit none
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+ integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2)
+ integer                        :: exc(0:2,2,2)
+ integer(bit_kind), allocatable :: det_i(:,:)
+ double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal
+ integer,           allocatable :: occ(:,:)
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int)
+ allocate(det_i(N_int,2))
+ s1 = 1 
+ s2 = 2
+ accu = 0.d0
+ do h1 = 1, elec_beta_num
+  do p1 = elec_alpha_num+1, mo_num
+   do h2 = 1, elec_beta_num
+    do p2 = elec_beta_num+1, mo_num
+     det_i = ref_bitmask
+     call do_single_excitation(det_i,h1,p1,s1,i_ok)
+     call do_single_excitation(det_i,h2,p2,s2,i_ok)
+     call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+     hthree *= phase
+!    !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
+     call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal)
+!     normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+     accu += dabs(hthree-normal)
+    enddo
+   enddo
+  enddo
+ enddo
+print*,'accu opposite spin = ',accu
+stop
+
+!    p2=6
+!    p1=5
+!    h2=2
+!    h1=1
+
+s1 = 1 
+s2 = 1
+accu = 0.d0
+do h1 = 1, elec_alpha_num
+ do p1 = elec_alpha_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_alpha_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    integer :: hh1, pp1, hh2, pp2, ss1, ss2
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin alpha = ',accu
+
+
+s1 = 2 
+s2 = 2
+accu = 0.d0
+do h1 = 1, elec_beta_num
+ do p1 = elec_beta_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_beta_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin beta  = ',accu
+
+
+end
+
+
diff --git a/src/tc_bi_ortho/test_spin_dens.irp.f b/src/tc_bi_ortho/test_spin_dens.irp.f
new file mode 100644
index 00000000..2c2f6e7c
--- /dev/null
+++ b/src/tc_bi_ortho/test_spin_dens.irp.f
@@ -0,0 +1,30 @@
+BEGIN_PROVIDER [ double precision, mo_r_coef_normalized, (ao_num,mo_num) ]
+ implicit none
+ integer :: i,j,k
+ double precision :: norm
+ do i = 1, mo_num
+  norm = 0.d0
+  do j = 1, ao_num
+   do k = 1, ao_num
+    norm += mo_r_coef(k,i) * mo_r_coef(j,i) * ao_overlap(k,j)
+   enddo
+  enddo
+  norm = 1.d0/dsqrt(norm)
+  do j = 1, ao_num
+   mo_r_coef_normalized(j,i) = mo_r_coef(j,i) * norm
+  enddo
+ enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, tc_spin_dens_right_only, (ao_num, ao_num)]
+ implicit none
+ integer :: i,j,k
+ tc_spin_dens_right_only = 0.d0
+ do i = elec_beta_num+1, elec_alpha_num
+  do j = 1, ao_num
+   do k = 1, ao_num
+    tc_spin_dens_right_only(k,j) += mo_r_coef_normalized(k,i) * mo_r_coef_normalized(j,i)
+   enddo
+  enddo
+ enddo
+END_PROVIDER
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
new file mode 100644
index 00000000..6721c285
--- /dev/null
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -0,0 +1,254 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+ call test_h_u0
+! call test_slater_tc_opt
+! call timing_tot
+! call timing_diag
+! call timing_single
+! call timing_double
+end
+
+subroutine test_h_u0
+ implicit none
+ double precision, allocatable :: v_0_ref(:),v_0_new(:),u_0(:), v_0_ref_dagger(:)
+ double precision :: accu 
+ logical :: do_right
+ integer :: i
+ allocate(v_0_new(N_det),v_0_ref(N_det),u_0(N_det),v_0_ref_dagger(N_det))
+ do_right = .True.
+ do i = 1, N_det
+  u_0(i) = psi_r_coef_bi_ortho(i,1)
+ enddo
+ call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
+ call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det)
+ print*,'difference right '
+ accu = 0.d0
+ do i = 1, N_det
+  print*,dabs(v_0_new(i) - v_0_ref(i)),v_0_new(i) , v_0_ref(i)
+  accu += dabs(v_0_new(i) - v_0_ref(i))
+ enddo
+ print*,'accu = ',accu
+ do_right = .False.
+ v_0_new = 0.d0
+ call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
+ call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right)
+ print*,'difference left'
+ accu = 0.d0
+ do i = 1, N_det
+  print*,dabs(v_0_new(i) - v_0_ref_dagger(i)),v_0_new(i) , v_0_ref_dagger(i)
+  accu += dabs(v_0_new(i) - v_0_ref_dagger(i))
+ enddo
+ print*,'accu = ',accu
+end
+
+subroutine test_slater_tc_opt
+ implicit none
+ integer :: i,j,degree
+ double precision :: hmono, htwoe, htot, hthree 
+ double precision :: hnewmono, hnewtwoe, hnewthree, hnewtot
+ double precision :: accu_d ,i_count, accu
+ accu = 0.d0
+ accu_d = 0.d0
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1,N_det
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
+   if(dabs(htot).gt.1.d-15)then
+     i_count += 1.D0
+     accu += dabs(htot-hnewtot) 
+     if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
+      call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+      print*,j,i,degree
+      call debug_det(psi_det(1,1,i),N_int)
+      call debug_det(psi_det(1,1,j),N_int)
+      print*,htot,hnewtot,dabs(htot-hnewtot)
+      print*,hthree,hnewthree,dabs(hthree-hnewthree)
+      stop
+     endif
+   endif
+  enddo
+ enddo
+ print*,'accu   = ',accu/i_count
+
+end
+
+subroutine timing_tot
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for old hij for total   = ',wall1 - wall0
+
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for total   = ',wall1 - wall0
+ call i_H_j(psi_det(1,1,1), psi_det(1,1,2),N_int,htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call i_H_j(psi_det(1,1,j), psi_det(1,1,i),N_int,htot)
+   i_count += 1.d0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij STANDARD    = ',wall1 - wall0
+
+end
+
+subroutine timing_diag
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = i,i 
+   i_count += 1.d0
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for old hij for diagonal= ',wall1 - wall0
+
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = i,i
+   i_count += 1.d0
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for diagonal= ',wall1 - wall0
+
+end
+
+subroutine timing_single
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for old hij for singles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for new hij for singles = ',accu
+
+end
+
+subroutine timing_double
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for old hij for doubles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for doubles = ',accu
+
+end
+
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
new file mode 100644
index 00000000..ebd43a7a
--- /dev/null
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -0,0 +1,194 @@
+program test_tc_fock
+ implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !call routine_1
+  !call routine_2
+!  call routine_3()
+
+! call test_3e
+ call routine_tot
+end
+
+! ---
+
+subroutine test_3e
+ implicit none
+ double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu
+ double precision ::  hmono, htwoe, hthree, htot
+ call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
+! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree)
+ print*,'hmono = ',hmono
+ print*,'htwoe = ',htwoe
+ print*,'hthree= ',hthree
+ print*,'htot  = ',htot
+ print*,''
+ print*,''
+ print*,'TC_one= ',tc_hf_one_e_energy
+ print*,'TC_two= ',TC_HF_two_e_energy
+ print*,'TC_3e = ',diag_three_elem_hf
+ print*,'TC_tot= ',TC_HF_energy
+ print*,''
+ print*,''
+ call give_aaa_contrib(integral_aaa)
+ print*,'integral_aaa = ',integral_aaa
+ call give_aab_contrib(integral_aab)
+ print*,'integral_aab = ',integral_aab
+ call give_abb_contrib(integral_abb)
+ print*,'integral_abb = ',integral_abb
+ call give_bbb_contrib(integral_bbb)
+ print*,'integral_bbb = ',integral_bbb
+ accu = integral_aaa + integral_aab + integral_abb + integral_bbb
+ print*,'accu = ',accu
+ print*,'delta = ',hthree - accu
+
+end
+
+subroutine routine_3()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+
+  err_tot = 0.d0
+ 
+  do s1 = 1, 2
+
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+   do i = 1, elec_num_tab(s1)
+     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+ 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       if(dabs(hthree).lt.1.d-10)cycle
+       ref = hthree 
+       if(s1 == 1)then
+        new = fock_a_tot_3e_bi_orth(a,i)
+       else if(s1 == 2)then
+        new = fock_b_tot_3e_bi_orth(a,i)
+       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
+
+! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/u0_h_u0.irp.f
new file mode 100644
index 00000000..e107ad88
--- /dev/null
+++ b/src/tc_bi_ortho/u0_h_u0.irp.f
@@ -0,0 +1,770 @@
+subroutine u_0_H_tc_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $E_0 = \frac{\langle u_0 | H_TC | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
+  !
+  ! n : number of determinants
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)             :: n,Nint, N_st, sze
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)   :: e_0(N_st)
+  double precision, intent(inout) :: u_0(sze,N_st)
+  integer(bit_kind),intent(in)    :: keys_tmp(Nint,2,n)
+
+  double precision, allocatable   :: v_0(:,:), u_1(:,:)
+  double precision                :: u_dot_u,u_dot_v,diag_H_mat_elem
+  integer                         :: i,j, istate
+
+  allocate (v_0(n,N_st),u_1(n,N_st))
+  u_1(:,:) = 0.d0
+  u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
+  call H_tc_u_0_nstates_openmp(v_0,u_1,N_st,n, do_right)
+  u_0(1:n,1:N_st) = u_1(1:n,1:N_st)
+  deallocate(u_1)
+  double precision :: norm
+  !$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED)
+  do i=1,N_st
+    norm = u_dot_u(u_0(1,i),n)
+    if (norm /= 0.d0) then
+      e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n) / dsqrt(norm)
+    else
+      e_0(i) = 0.d0
+    endif
+  enddo
+  !$OMP END PARALLEL DO
+  deallocate (v_0)
+end
+
+
+subroutine H_tc_u_0_opt(v_0,u_0,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical           :: do_right 
+  do_right = .True.
+  call H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+end
+
+subroutine H_tc_dagger_u_0_opt(v_0,u_0,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical           :: do_right 
+  do_right = .False.
+  call H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+end
+
+
+subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical, intent(in)             :: do_right 
+  integer :: k
+  double precision, allocatable  :: u_t(:,:), v_t(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
+  allocate(u_t(N_st,N_det),v_t(N_st,N_det))
+!  provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e 
+!  provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell 
+!  provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb
+  do k=1,N_st
+    call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
+  enddo
+  v_t = 0.d0
+  call dtranspose(                                                   &
+      u_0,                                                           &
+      size(u_0, 1),                                                  &
+      u_t,                                                           &
+      size(u_t, 1),                                                  &
+      N_det, N_st)
+
+  call H_tc_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,1,N_det,0,1, do_right)
+  deallocate(u_t)
+
+  call dtranspose(                                                   &
+      v_t,                                                           &
+      size(v_t, 1),                                                  &
+      v_0,                                                           &
+      size(v_0, 1),                                                  &
+      N_st, N_det)
+  deallocate(v_t)
+
+  do k=1,N_st
+    call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+  enddo
+
+end
+
+
+subroutine H_tc_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t\rangle$ 
+  !
+  ! Default should be 1,N_det,0,1
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  double precision, intent(in)   :: u_t(N_st,N_det)
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)  :: v_t(N_st,sze)
+
+
+  PROVIDE ref_bitmask_energy N_int
+
+  select case (N_int)
+    case (1)
+      call H_tc_u_0_nstates_openmp_work_1(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (2)
+      call H_tc_u_0_nstates_openmp_work_2(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (3)
+      call H_tc_u_0_nstates_openmp_work_3(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (4)
+      call H_tc_u_0_nstates_openmp_work_4(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case default
+      call H_tc_u_0_nstates_openmp_work_N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+  end select
+end
+BEGIN_TEMPLATE
+
+subroutine H_tc_u_0_nstates_openmp_work_$N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t \\rangle$ 
+  !
+  ! Default should be 1,N_det,0,1
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  double precision, intent(in)   :: u_t(N_st,N_det)
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)  :: v_t(N_st,sze)
+
+  double precision               :: hij
+  integer                        :: i,j,k,l,kk
+  integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
+  integer                        :: istate
+  integer                        :: krow, kcol, krow_b, kcol_b
+  integer                        :: lrow, lcol
+  integer                        :: mrow, mcol
+  integer(bit_kind)              :: spindet($N_int)
+  integer(bit_kind)              :: tmp_det($N_int,2)
+  integer(bit_kind)              :: tmp_det2($N_int,2)
+  integer(bit_kind)              :: tmp_det3($N_int,2)
+  integer(bit_kind), allocatable :: buffer(:,:)
+  integer                        :: n_doubles
+  integer, allocatable           :: doubles(:)
+  integer, allocatable           :: singles_a(:)
+  integer, allocatable           :: singles_b(:)
+  integer, allocatable           :: idx(:), idx0(:)
+  integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
+  integer*8                      :: k8
+  logical                        :: compute_singles
+  integer*8                      :: last_found, left, right, right_max
+  double precision               :: rss, mem, ratio
+  double precision, allocatable  :: utl(:,:)
+  integer, parameter             :: block_size=128
+  logical                        :: u_is_sparse
+
+!  call resident_memory(rss)
+!  mem = dble(singles_beta_csc_size) / 1024.d0**3
+!
+!  compute_singles = (mem+rss > qp_max_mem)
+!
+!  if (.not.compute_singles) then
+!    provide singles_beta_csc
+!  endif
+compute_singles=.True.
+
+
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+  allocate(idx0(maxab))
+
+  do i=1,maxab
+    idx0(i) = i
+  enddo
+
+  ! Prepare the array of all alpha single excitations
+  ! -------------------------------------------------
+
+  PROVIDE N_int nthreads_davidson
+  !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson)        &
+      !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,                &
+      !$OMP          psi_bilinear_matrix_columns,                    &
+      !$OMP          psi_det_alpha_unique, psi_det_beta_unique,      &
+      !$OMP          n_det_alpha_unique, n_det_beta_unique, N_int,   &
+      !$OMP          psi_bilinear_matrix_transp_rows,                &
+      !$OMP          psi_bilinear_matrix_transp_columns,             &
+      !$OMP          psi_bilinear_matrix_transp_order, N_st,         &
+      !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
+      !$OMP          psi_bilinear_matrix_columns_loc,                &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t,             &
+      !$OMP          ishift, idx0, u_t, maxab, compute_singles,      &
+      !$OMP          singles_alpha_csc,singles_alpha_csc_idx,        &
+      !$OMP          singles_beta_csc,singles_beta_csc_idx)          &
+      !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
+      !$OMP          lcol, lrow, l_a, l_b, utl, kk, u_is_sparse,     &
+      !$OMP          buffer, doubles, n_doubles, umax,               &
+      !$OMP          tmp_det2, hij, idx, l, kcol_prev,hmono, htwoe, hthree,          &
+      !$OMP          singles_a, n_singles_a, singles_b, ratio,       &
+      !$OMP          n_singles_b, k8, last_found,left,right,right_max)
+
+  ! Alpha/Beta double excitations
+  ! =============================
+
+  allocate( buffer($N_int,maxab),                                     &
+      singles_a(maxab),                                              &
+      singles_b(maxab),                                              &
+      doubles(maxab),                                                &
+      idx(maxab), utl(N_st,block_size))
+
+  kcol_prev=-1
+
+  ! Check if u has multiple zeros
+  kk=1 ! Avoid division by zero
+  !$OMP DO
+  do k=1,N_det
+    umax = 0.d0
+    do l=1,N_st
+      umax = max(umax, dabs(u_t(l,k)))
+    enddo
+    if (umax < 1.d-20) then
+      !$OMP ATOMIC
+      kk = kk+1
+    endif
+  enddo
+  !$OMP END DO
+  u_is_sparse = N_det / kk < 20  ! 5%
+
+  ASSERT (iend <= N_det)
+  ASSERT (istart > 0)
+  ASSERT (istep  > 0)
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep    ! Loop over all determinants (/!\ not in psidet order)
+
+    krow = psi_bilinear_matrix_rows(k_a)       ! Index of alpha part of determinant k_a
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)    ! Index of beta part of determinant k_a
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+
+    if (kcol /= kcol_prev) then
+      tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+      if (compute_singles) then
+        call get_all_spin_singles_$N_int(                              &
+            psi_det_beta_unique, idx0,                                 &
+            tmp_det(1,2), N_det_beta_unique,                           &
+            singles_b, n_singles_b)
+      else
+        n_singles_b = 0
+        !DIR$ LOOP COUNT avg(1000)
+        do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
+          n_singles_b = n_singles_b+1
+          singles_b(n_singles_b) = singles_beta_csc(k8)
+        enddo
+      endif
+    endif
+    kcol_prev = kcol
+
+    ! -> Here, tmp_det is determinant k_a
+
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
+
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b
+      lcol = singles_b(i)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
+
+      ! tmp_det2 is a single excitation of tmp_det in the beta spin
+      ! the alpha part is not defined yet
+
+!---
+!      if (compute_singles) then
+
+        l_a = psi_bilinear_matrix_columns_loc(lcol)
+        ASSERT (l_a <= N_det)
+        ! rows  : | 1 2 3 4 | 1 3 4 6 | .... |  1  2  4  5 |
+        ! cols  : | 1 1 1 1 | 2 2 2 2 | .... |  8  8  8  8 |
+        ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 |
+        !                     ^                          ^
+        !                     |                          |
+        !                     l_a                       N_det
+        ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol
+
+        ! Below we identify all the determinants with the same beta part
+
+        !DIR$ UNROLL(8)
+        !DIR$ LOOP COUNT avg(50000)
+        do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)  ! hot spot
+
+          ASSERT (l_a <= N_det)
+          idx(j) = l_a
+          l_a = l_a+1
+        enddo
+        j = j-1
+
+        ! Get all single excitations from tmp_det(1,1) to buffer(1,?)
+
+        call get_all_spin_singles_$N_int(                              &
+            buffer, idx, tmp_det(1,1), j,                              &
+            singles_a, n_singles_a )
+
+      ! Loop over alpha singles
+      ! -----------------------
+
+      double precision :: umax
+
+      !DIR$ LOOP COUNT avg(1000)
+      do k = 1,n_singles_a,block_size
+        umax = 0.d0
+        ! Prefetch u_t(:,l_a)
+        if (u_is_sparse) then
+          do kk=0,block_size-1
+            if (k+kk > n_singles_a) exit
+            l_a = singles_a(k+kk)
+            ASSERT (l_a <= N_det)
+
+            do l=1,N_st
+              utl(l,kk+1) = u_t(l,l_a)
+              umax = max(umax, dabs(utl(l,kk+1)))
+            enddo
+          enddo
+        else
+          do kk=0,block_size-1
+            if (k+kk > n_singles_a) exit
+            l_a = singles_a(k+kk)
+            ASSERT (l_a <= N_det)
+            utl(:,kk+1) = u_t(:,l_a)
+          enddo
+          umax = 1.d0
+        endif
+        if (umax < 1.d-20) cycle
+
+        do kk=0,block_size-1
+          if (k+kk > n_singles_a) exit
+          l_a = singles_a(k+kk)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta
+          if(do_right)then
+           call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+          else
+           call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+          endif
+          !DIR$ LOOP COUNT AVG(4)
+          do l=1,N_st
+            v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          enddo
+        enddo
+      enddo
+
+    enddo
+
+  enddo
+  !$OMP END DO
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep
+
+
+    ! Single and double alpha excitations
+    ! ===================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! ----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,1)
+
+    ! Loop inside the beta column to gather all the connected alphas
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
+
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_alpha_unique
+      if (l_a > N_det) exit
+      lcol = psi_bilinear_matrix_columns(l_a)
+      if (lcol /= kcol) exit
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot
+      idx(i) = l_a
+      l_a = l_a+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_a, doubles, n_singles_a, n_doubles )
+
+    ! Compute Hij for all alpha singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_a,block_size
+      umax = 0.d0
+      ! Prefetch u_t(:,l_a)
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_singles_a) exit
+          l_a = singles_a(i+kk)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_singles_a) exit
+          l_a = singles_a(i+kk)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_a) exit
+        l_a = singles_a(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+!        call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij)
+         if(do_right)then
+          call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+         else
+          call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+         endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Compute Hij for all alpha doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      umax = 0.d0
+      ! Prefetch u_t(:,l_a)
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_a = doubles(i+kk)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_a = doubles(i+kk)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_a = doubles(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) 
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij)
+!        call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Single and double beta excitations
+    ! ==================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    kcol = psi_bilinear_matrix_columns(k_a)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,2)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! -----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    ! Loop inside the alpha row to gather all the connected betas
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_beta_unique
+      if (l_b > N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l_b)
+      if (lrow /= krow) exit
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
+      idx(i) = l_b
+      l_b = l_b+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_b, doubles, n_singles_b, n_doubles )
+
+    ! Compute Hij for all beta singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b,block_size
+      umax = 0.d0
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_singles_b) exit
+          l_b = singles_b(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_singles_b) exit
+          l_b = singles_b(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_b) exit
+        l_b = singles_b(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+!        call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+    ! Compute Hij for all beta doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      umax = 0.d0
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_b = doubles(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_b = doubles(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_b = doubles(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) 
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij)
+!        call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Diagonal contribution
+    ! =====================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    if (u_is_sparse) then
+      umax = 0.d0
+      do l=1,N_st
+        umax = max(umax, dabs(u_t(l,k_a)))
+      enddo
+    else
+      umax = 1.d0
+    endif
+    if (umax < 1.d-20) cycle
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    double precision, external :: diag_H_mat_elem
+    double precision :: hmono, htwoe, hthree
+
+!    hij = diag_H_mat_elem(tmp_det,$N_int)
+    call diag_htilde_mu_mat_fock_bi_ortho  ($N_int, tmp_det, hmono, htwoe, hthree, hij)
+    !DIR$ LOOP COUNT AVG(4)
+    do l=1,N_st
+      v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
+    enddo
+
+  end do
+  !$OMP END DO
+  deallocate(buffer, singles_a, singles_b, doubles, idx, utl)
+  !$OMP END PARALLEL
+
+end
+
+SUBST [ N_int ]
+
+1;;
+2;;
+3;;
+4;;
+N_int;;
+
+END_TEMPLATE
+
+
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
new file mode 100644
index 00000000..e397e700
--- /dev/null
+++ b/src/tc_keywords/EZFIO.cfg
@@ -0,0 +1,217 @@
+[read_rl_eigv]
+type: logical
+doc: If |true|, read the right/left eigenvectors from ezfio
+interface: ezfio,provider,ocaml
+default: False
+
+[comp_left_eigv]
+type: logical
+doc: If |true|, computes also the left-eigenvector 
+interface: ezfio,provider,ocaml
+default: False
+
+[three_body_h_tc]
+type: logical
+doc: If |true|, three-body terms are included
+interface: ezfio,provider,ocaml
+default: True 
+
+[pure_three_body_h_tc]
+type: logical
+doc: If |true|, pure triple excitation three-body terms are included
+interface: ezfio,provider,ocaml
+default: False
+
+[double_normal_ord]
+type: logical
+doc: If |true|, contracted double excitation three-body terms are included
+interface: ezfio,provider,ocaml
+default: False
+
+[core_tc_op]
+type: logical
+doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) 
+interface: ezfio,provider,ocaml
+default: False
+
+[full_tc_h_solver]
+type: logical
+doc: If |true|, you diagonalize the full TC H matrix 
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_it_dav]
+type: Threshold
+doc: Thresholds on the energy for iterative Davidson used in TC
+interface: ezfio,provider,ocaml
+default: 1.e-5
+
+[max_it_dav]
+type: integer
+doc: nb max of iteration in Davidson used in TC
+interface: ezfio,provider,ocaml
+default: 1000
+
+[thresh_psi_r]
+type: Threshold
+doc: Thresholds on the coefficients of the right-eigenvector. Used for PT2 computation.
+interface: ezfio,provider,ocaml
+default: 0.000005
+
+[thresh_psi_r_norm]
+type: logical
+doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. 
+interface: ezfio,provider,ocaml
+default: False
+
+[state_following_tc] 
+type: logical
+doc: If |true|, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[bi_ortho]
+type: logical
+doc: If |true|, the MO basis is assumed to be bi-orthonormal
+interface: ezfio,provider,ocaml
+default: True
+
+[symetric_fock_tc]
+type: logical
+doc: If |true|, using F+F^t as Fock TC 
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the Hartree Fock energy.
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[n_it_tcscf_max]
+type: Strictly_positive_int
+doc: Maximum number of SCF iterations
+interface: ezfio,provider,ocaml
+default: 100
+
+[selection_tc]
+type: integer
+doc: if +1: only positive is selected, -1: only negative is selected, :0 both positive and negative
+interface: ezfio,provider,ocaml
+default: 0
+
+[j1b_pen]
+type: double precision
+doc: exponents of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_coeff]
+type: double precision
+doc: coeff of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_type]
+type: integer
+doc: type of 1-body Jastrow
+interface: ezfio, provider, ocaml
+default: 0
+
+[thr_degen_tc]
+type: Threshold
+doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[maxovl_tc]
+type: logical
+doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors 
+interface: ezfio,provider,ocaml
+default: False
+
+[ng_fit_jast]
+type: integer
+doc: nb of Gaussians used to fit Jastrow fcts
+interface: ezfio,provider,ocaml
+default: 20
+
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: Simple 
+
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: True
+
+[thresh_biorthog_diag]
+type: Threshold
+doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_biorthog_nondiag]
+type: Threshold
+doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[var_tc]
+type: logical
+doc: If |true|, use VAR-TC 
+interface: ezfio,provider,ocaml
+default: False
+
+[read_tc_integ]
+type: logical
+doc: If |true|, read integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao
+interface: ezfio,provider,ocaml
+default: False
+
+[write_tc_integ]
+type: logical
+doc: If |true|, write integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao
+interface: ezfio,provider,ocaml
+default: False
+
+[debug_tc_pt2]
+type: integer
+doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body
+interface: ezfio,provider,ocaml
+default: -1
+
+[only_spin_tc_right]
+type: logical
+doc: If |true|, only the right part of WF is used to compute spin dens
+interface: ezfio,provider,ocaml
+default: False
+
+[save_sorted_tc_wf]
+type: logical
+doc: If |true|, save the bi-ortho wave functions in a sorted way
+interface: ezfio,provider,ocaml
+default: True
+
+
diff --git a/src/tc_keywords/NEED b/src/tc_keywords/NEED
new file mode 100644
index 00000000..f1c051ff
--- /dev/null
+++ b/src/tc_keywords/NEED
@@ -0,0 +1,2 @@
+ezfio_files
+nuclei
diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f
new file mode 100644
index 00000000..57250b52
--- /dev/null
+++ b/src/tc_keywords/j1b_pen.irp.f
@@ -0,0 +1,116 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ]
+
+  BEGIN_DOC
+  ! exponents of the 1-body Jastrow
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_tc_keywords_j1b_pen(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read j1b_pen with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen ] <<<<< ..'
+      call ezfio_get_tc_keywords_j1b_pen(j1b_pen)
+      IRP_IF MPI
+        call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read j1b_pen with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, nucl_num
+      j1b_pen(i) = 1d5
+    enddo
+
+  endif
+ print*,'parameters for nuclei jastrow'
+ do i = 1, nucl_num
+  print*,'i,Z,j1b_pen(i)',i,nucl_charge(i),j1b_pen(i)
+ enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ]
+
+  BEGIN_DOC
+  ! coefficients of the 1-body Jastrow
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_tc_keywords_j1b_coeff(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(j1b_coeff, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read j1b_coeff with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: j1b_coeff ] <<<<< ..'
+      call ezfio_get_tc_keywords_j1b_coeff(j1b_coeff)
+      IRP_IF MPI
+        call MPI_BCAST(j1b_coeff, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read j1b_coeff with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, nucl_num
+      j1b_coeff(i) = 0d5
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
diff --git a/src/tc_keywords/tc_keywords.irp.f b/src/tc_keywords/tc_keywords.irp.f
new file mode 100644
index 00000000..3bc68550
--- /dev/null
+++ b/src/tc_keywords/tc_keywords.irp.f
@@ -0,0 +1,7 @@
+program tc_keywords
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+end
diff --git a/src/tc_scf/11.tc_scf.bats b/src/tc_scf/11.tc_scf.bats
new file mode 100644
index 00000000..91b52540
--- /dev/null
+++ b/src/tc_scf/11.tc_scf.bats
@@ -0,0 +1,101 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_Ne() {
+  rm -rf Ne_tc_scf
+  echo Ne > Ne.xyz
+  qp create_ezfio -b cc-pcvdz Ne.xyz -o Ne_tc_scf
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-128.552134
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "Ne" {
+ run_Ne 
+}
+
+function run_C() {
+  rm -rf C_tc_scf
+  echo C  > C.xyz
+  qp create_ezfio -b cc-pcvdz C.xyz -o C_tc_scf -m 3
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-37.691254356408791
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "C" {
+ run_C 
+}
+
+
+function run_O() {
+  rm -rf O_tc_scf
+  echo O  > O.xyz
+  qp create_ezfio -b cc-pcvdz O.xyz -o O_tc_scf -m 3
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-74.814687229354590
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "O" {
+ run_O 
+}
+
+
+
+function run_ch2() {
+  rm -rf ch2_tc_scf
+  cp ${QP_ROOT}/tests/input/ch2.xyz .
+  qp create_ezfio -b "C:cc-pcvdz|H:cc-pvdz" ch2.xyz -o ch2_tc_scf 
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen '[1.5,10000,10000]'
+  qp set tc_keywords bi_ortho True
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-38.903247818077737
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "ch2" {
+ run_ch2 
+}
+
diff --git a/src/tc_scf/EZFIO.cfg b/src/tc_scf/EZFIO.cfg
new file mode 100644
index 00000000..313d6f2b
--- /dev/null
+++ b/src/tc_scf/EZFIO.cfg
@@ -0,0 +1,4 @@
+[bitc_energy]
+type: Threshold
+doc: Energy bi-tc HF
+interface: ezfio
diff --git a/src/tc_scf/NEED b/src/tc_scf/NEED
new file mode 100644
index 00000000..84b0f792
--- /dev/null
+++ b/src/tc_scf/NEED
@@ -0,0 +1,6 @@
+hartree_fock
+bi_ortho_mos
+ortho_three_e_ints
+bi_ort_ints
+tc_keywords
+non_hermit_dav
diff --git a/src/tc_scf/combine_lr_tcscf.irp.f b/src/tc_scf/combine_lr_tcscf.irp.f
new file mode 100644
index 00000000..b257f4a5
--- /dev/null
+++ b/src/tc_scf/combine_lr_tcscf.irp.f
@@ -0,0 +1,74 @@
+
+! ---
+
+program combine_lr_tcscf
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  bi_ortho = .True.
+  touch bi_ortho
+
+  call comb_orbitals()
+
+end
+
+! ---
+
+subroutine comb_orbitals()
+
+  implicit none
+  integer                       :: i, m, n, nn, mm
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: R(:,:), L(:,:), Rnew(:,:), tmp(:,:), S(:,:)
+
+  n  = ao_num
+  m  = mo_num
+  nn = elec_alpha_num
+  mm = m - nn
+
+  allocate(L(n,m), R(n,m), Rnew(n,m), S(m,m))
+  L = mo_l_coef
+  R = mo_r_coef
+
+  call check_weighted_biorthog(n, m, ao_overlap, L, R, accu_d, accu_nd, S, .true.)
+
+  allocate(tmp(n,nn))
+  do i = 1, nn 
+    tmp(1:n,i) = R(1:n,i)
+  enddo
+  call impose_weighted_orthog_svd(n, nn, ao_overlap, tmp)
+  do i = 1, nn
+    Rnew(1:n,i) = tmp(1:n,i)
+  enddo
+  deallocate(tmp)
+
+  allocate(tmp(n,mm))
+  do i = 1, mm
+    tmp(1:n,i) = L(1:n,i+nn)
+  enddo
+  call impose_weighted_orthog_svd(n, mm, ao_overlap, tmp)
+  do i = 1, mm
+    Rnew(1:n,i+nn) = tmp(1:n,i)
+  enddo
+  deallocate(tmp)
+
+  call check_weighted_biorthog(n, m, ao_overlap, Rnew, Rnew, accu_d, accu_nd, S, .true.)
+
+  mo_r_coef = Rnew
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+  deallocate(L, R, Rnew, S)
+
+end subroutine comb_orbitals
+
+! ---
+
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
new file mode 100644
index 00000000..02545315
--- /dev/null
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -0,0 +1,205 @@
+! ---
+
+ BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_mo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE MO BASIS and their OVERLAP
+  END_DOC
+
+  implicit none
+  integer                       :: n_real_tc 
+  integer                       :: i, j, k, l
+  double precision              :: accu_d, accu_nd, accu_tmp
+  double precision              :: norm
+  double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
+
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
+
+  PROVIDE Fock_matrix_tc_mo_tot
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
+                     , n_real_tc, eigval_right_tmp )
+
+  deallocate(F_tmp)
+
+  eigval_fock_tc_mo = eigval_right_tmp
+
+  ! L.T x R 
+  call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
+            , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+            , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+  ! ---
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      if(i==k) then
+        accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+        accu_d += dabs(accu_tmp )
+      else
+        accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+        accu_nd += accu_tmp * accu_tmp
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+         print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+        endif
+      endif
+    enddo 
+  enddo
+  accu_nd = dsqrt(accu_nd) / accu_d
+  if(accu_nd .gt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthog failed'
+    print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+    print *, ' overlap_fock_tc_eigvec_mo = '
+    do i = 1, mo_num
+      write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+    enddo
+    stop
+  endif
+
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thresh_biorthog_diag
+    print *, ' normalizing vectors ...'
+    do i = 1, mo_num
+      norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
+      if(norm .gt. thresh_biorthog_diag) then
+        do k = 1, mo_num
+          fock_tc_reigvec_mo(k,i) *= 1.d0/norm
+          fock_tc_leigvec_mo(k,i) *= 1.d0/norm
+        enddo
+      endif
+    enddo
+
+    call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
+              , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+              , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+              , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thresh_biorthog_diag) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
+  endif
+ 
+  ! ---
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
+
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
+  !
+  ! THE OVERLAP SHOULD BE THE SAME AS overlap_fock_tc_eigvec_mo
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, q, p
+  double precision              :: accu, accu_d
+  double precision, allocatable :: tmp(:,:)
+
+  PROVIDE mo_l_coef mo_r_coef
+
+!  ! MO_R x R
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_r_coef, size(mo_r_coef, 1)                   &
+             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+             , 0.d0, fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) )
+
+   ! MO_L x L
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_l_coef, size(mo_l_coef, 1)                   &
+             , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+             , 0.d0, fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1) )
+ 
+  allocate( tmp(mo_num,ao_num) )
+
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+            , fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+            , tmp, size(tmp, 1), fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_ao, size(overlap_fock_tc_eigvec_ao, 1) )
+
+  deallocate( tmp )
+
+  ! ---
+  double precision :: norm
+  do i = 1, mo_num
+   norm = 1.d0/dsqrt(dabs(overlap_fock_tc_eigvec_ao(i,i)))
+   do j = 1, mo_num
+    fock_tc_reigvec_ao(j,i) *= norm
+    fock_tc_leigvec_ao(j,i) *= norm
+   enddo
+  enddo
+
+  allocate( tmp(mo_num,ao_num) )
+
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+            , fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+            , tmp, size(tmp, 1), fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_ao, size(overlap_fock_tc_eigvec_ao, 1) )
+
+  deallocate( tmp )
+
+END_PROVIDER
+
diff --git a/src/tc_scf/diago_vartcfock.irp.f b/src/tc_scf/diago_vartcfock.irp.f
new file mode 100644
index 00000000..0c881dcb
--- /dev/null
+++ b/src/tc_scf/diago_vartcfock.irp.f
@@ -0,0 +1,96 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  integer                       :: i, j
+  integer                       :: liwork, lwork, n, info
+  integer,          allocatable :: iwork(:)
+  double precision, allocatable :: work(:), F(:,:), F_save(:,:)
+  double precision, allocatable :: diag(:)
+
+  PROVIDE mo_r_coef
+  PROVIDE Fock_matrix_vartc_mo_tot
+
+  allocate( F(mo_num,mo_num), F_save(mo_num,mo_num)  )
+  allocate (diag(mo_num) )
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      F(i,j) = Fock_matrix_vartc_mo_tot(i,j)
+    enddo
+  enddo
+
+  ! Insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F(i,i) += level_shift_tcscf
+  enddo
+
+  n = mo_num
+  lwork = 1+6*n + 2*n*n
+  liwork = 3 + 5*n
+
+  allocate(work(lwork))
+  allocate(iwork(liwork) )
+
+  lwork = -1
+  liwork = -1
+
+  F_save = F
+  call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info)
+
+  if (info /= 0) then
+    print *,  irp_here//' DSYEVD failed : ', info
+    stop 1
+  endif
+  lwork = int(work(1))
+  liwork = iwork(1)
+  deallocate(iwork)
+  deallocate(work)
+
+  allocate(work(lwork))
+  allocate(iwork(liwork) )
+  call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info)
+  deallocate(iwork)
+
+  if (info /= 0) then
+    F = F_save
+    call dsyev('V', 'L', mo_num, F, size(F, 1), diag, work, lwork, info)
+
+    if (info /= 0) then
+      print *,  irp_here//' DSYEV failed : ', info
+      stop 1
+    endif
+  endif
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      fock_vartc_eigvec_mo(j,i) = F(j,i)
+    enddo
+  enddo
+
+  deallocate(work, F, F_save, diag)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_ao, (ao_num, mo_num)]
+
+  implicit none
+
+  PROVIDE mo_r_coef
+
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0                                             &
+            , mo_r_coef, size(mo_r_coef, 1), fock_vartc_eigvec_mo, size(fock_vartc_eigvec_mo, 1) &
+            , 0.d0, fock_vartc_eigvec_ao, size(fock_vartc_eigvec_ao, 1))
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..0b08f784
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,190 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_alpha = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_beta = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  integer                       :: i, j
+  double precision, allocatable :: tmp(:,:)
+  double precision, allocatable :: F(:,:)
+
+  allocate(F(ao_num,ao_num))
+  if(var_tc) then
+    do i = 1, ao_num
+      do j = 1, ao_num
+        F(j,i) = Fock_matrix_vartc_ao_tot(j,i)
+      enddo
+    enddo
+  else
+    do i = 1, ao_num
+      do j = 1, ao_num
+        F(j,i) = Fock_matrix_tc_ao_tot(j,i)
+      enddo
+    enddo
+  endif
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0     &
+            , F, size(F, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  tmp = 0.d0
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S - S x Q x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0 &
+            , tmp, size(tmp, 1), F, size(F, 1)        &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  deallocate(tmp)
+  deallocate(F)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..d8b962d7
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,415 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! ALPHA part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+  BEGIN_DOC
+! BETA part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_hermit.irp.f b/src/tc_scf/fock_hermit.irp.f
new file mode 100644
index 00000000..5a51b324
--- /dev/null
+++ b/src/tc_scf/fock_hermit.irp.f
@@ -0,0 +1,107 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, good_hermit_tc_fock_mat, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! good_hermit_tc_fock_mat = Hermitian Upper triangular Fock matrix 
+!
+! The converged eigenvectors of such matrix yield to orthonormal vectors satisfying the left Brillouin theorem
+  END_DOC
+  implicit none
+  integer :: i, j
+
+  good_hermit_tc_fock_mat = Fock_matrix_tc_mo_tot
+  do j = 1, mo_num
+    do i = 1, j-1
+      good_hermit_tc_fock_mat(i,j) = Fock_matrix_tc_mo_tot(j,i) 
+    enddo
+  enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, hermit_average_tc_fock_mat, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! hermit_average_tc_fock_mat = (F + F^\dagger)/2
+  END_DOC
+  implicit none
+  integer :: i, j
+
+  hermit_average_tc_fock_mat = Fock_matrix_tc_mo_tot
+  do j = 1, mo_num
+    do i = 1, mo_num
+      hermit_average_tc_fock_mat(i,j) = 0.5d0 * (Fock_matrix_tc_mo_tot(j,i) + Fock_matrix_tc_mo_tot(i,j))
+    enddo
+  enddo
+
+END_PROVIDER 
+
+
+! ---
+BEGIN_PROVIDER [ double precision, grad_hermit]
+ implicit none
+ BEGIN_DOC
+ ! square of gradient of the energy
+ END_DOC
+ if(symetric_fock_tc)then
+  grad_hermit = grad_hermit_average_tc_fock_mat
+ else
+  grad_hermit = grad_good_hermit_tc_fock_mat
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, grad_good_hermit_tc_fock_mat]
+  implicit none
+  BEGIN_DOC
+  ! grad_good_hermit_tc_fock_mat = norm of gradients of the upper triangular TC fock
+  END_DOC
+  integer :: i, j
+  grad_good_hermit_tc_fock_mat = 0.d0
+  do i = 1, elec_alpha_num
+    do j = elec_alpha_num+1, mo_num
+      grad_good_hermit_tc_fock_mat += dabs(good_hermit_tc_fock_mat(i,j))
+    enddo
+  enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad_hermit_average_tc_fock_mat]
+  implicit none
+  BEGIN_DOC
+  ! grad_hermit_average_tc_fock_mat = norm of gradients of the upper triangular TC fock
+  END_DOC
+  integer :: i, j
+  grad_hermit_average_tc_fock_mat = 0.d0
+  do i = 1, elec_alpha_num
+    do j = elec_alpha_num+1, mo_num
+      grad_hermit_average_tc_fock_mat += dabs(hermit_average_tc_fock_mat(i,j))
+    enddo
+  enddo
+END_PROVIDER 
+
+
+! ---
+
+subroutine save_good_hermit_tc_eigvectors()
+
+  implicit none
+  integer        :: sign
+  character*(64) :: label
+  logical        :: output
+
+  sign = 1
+  label = "Canonical"
+  output = .False.
+  
+  if(symetric_fock_tc)then
+   call mo_as_eigvectors_of_mo_matrix(hermit_average_tc_fock_mat, mo_num, mo_num, label, sign, output)
+  else
+   call mo_as_eigvectors_of_mo_matrix(good_hermit_tc_fock_mat, mo_num, mo_num, label, sign, output)
+  endif
+end subroutine save_good_hermit_tc_eigvectors
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
new file mode 100644
index 00000000..1d651c4e
--- /dev/null
+++ b/src/tc_scf/fock_tc.irp.f
@@ -0,0 +1,286 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> ON THE AO BASIS 
+  !
+  ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions
+  !
+  ! works in SEQUENTIAL 
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: density, density_a, density_b
+  double precision :: t0, t1
+
+  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
+  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
+
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do j = 1, ao_num
+        do l = 1, ao_num
+
+          density_a = TCSCF_density_matrix_ao_alpha(l,j)
+          density_b = TCSCF_density_matrix_ao_beta (l,j)
+          density   = density_a + density_b
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
+          !                                         rho(l,j)   *      < k l| T | i j>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho(l,j)   *      < k l| T | i j>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !                                         rho_b(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> ON THE AO BASIS 
+  !
+  ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_alpha = 0.d0
+  two_e_tc_non_hermit_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+ !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_tc_tot(k,i,l,j)
+          I_kjli = ao_two_e_tc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  END_DOC
+
+  implicit none
+
+  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  END_DOC
+
+  implicit none
+
+  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
+
+  BEGIN_DOC
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  END_DOC
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+    endif
+
+  else
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                 , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
+
+  BEGIN_DOC
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  END_DOC
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
+    endif
+
+  else
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                 , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
+&BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
+&BEGIN_PROVIDER [ double precision, grad_non_hermit]
+
+  implicit none
+  integer :: i, k
+
+  grad_non_hermit_left  = 0.d0
+  grad_non_hermit_right = 0.d0
+
+  do i = 1, elec_beta_num ! doc --> SOMO
+    do k = elec_beta_num+1, elec_alpha_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  do i = 1, elec_beta_num ! doc --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  grad_non_hermit = max(grad_non_hermit_left, grad_non_hermit_right)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
new file mode 100644
index 00000000..a03a0624
--- /dev/null
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -0,0 +1,144 @@
+
+ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)]
+   implicit none
+   BEGIN_DOC
+   ! TC-Fock matrix on the MO basis. WARNING !!! NON HERMITIAN !!!
+   ! For open shells, the ROHF Fock Matrix is ::
+   !
+   !       |   F-K    |  F + K/2  |    F     |
+   !       |---------------------------------|
+   !       | F + K/2  |     F     |  F - K/2 |
+   !       |---------------------------------|
+   !       |    F     |  F - K/2  |  F + K   |
+   !
+   !
+   ! F = 1/2 (Fa + Fb)
+   !
+   ! K = Fb - Fa
+   !
+   END_DOC
+   integer                        :: i,j,n
+   if (elec_alpha_num == elec_beta_num) then
+     Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha
+   else
+
+     do j=1,elec_beta_num
+       ! F-K
+       do i=1,elec_beta_num !CC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F+K/2
+       do i=elec_beta_num+1,elec_alpha_num  !CA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_alpha_num+1, mo_num !CV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+     enddo
+
+     do j=elec_beta_num+1,elec_alpha_num
+       ! F+K/2
+       do i=1,elec_beta_num !AC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_beta_num+1,elec_alpha_num !AA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_alpha_num+1, mo_num !AV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+     enddo
+
+     do j=elec_alpha_num+1, mo_num
+       ! F
+       do i=1,elec_beta_num !VC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_beta_num+1,elec_alpha_num !VA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F+K
+       do i=elec_alpha_num+1,mo_num !VV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) &
+             + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+     enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
+
+   endif
+
+   do i = 1, mo_num
+     Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i)
+   enddo
+
+
+   if(frozen_orb_scf)then
+     integer                        :: iorb,jorb
+     do i = 1, n_core_orb
+      iorb = list_core(i)
+      do j = 1, n_act_orb
+       jorb = list_act(j)
+       Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+       Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+     enddo
+   endif
+
+   if(no_oa_or_av_opt)then
+     do i = 1, n_act_orb
+       iorb = list_act(i)
+       do j = 1, n_inact_orb
+         jorb = list_inact(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_virt_orb
+         jorb = list_virt(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_core_orb
+         jorb = list_core(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0                                                                                                                 
+       enddo
+     enddo
+   endif
+  if(.not.bi_ortho .and. three_body_h_tc)then
+   Fock_matrix_tc_mo_tot += fock_3_mat
+  endif
+
+END_PROVIDER
+
diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f
new file mode 100644
index 00000000..7c776ce5
--- /dev/null
+++ b/src/tc_scf/fock_three_bi_ortho.irp.f
@@ -0,0 +1,296 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! Alpha part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! Beta part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
+    enddo
+  enddo
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
+  enddo
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
+end
+
+! ---
+
+subroutine contrib_3e_soo(a,i,j,k,integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
+end
+
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
+end
+
+! ---
+
diff --git a/src/tc_scf/fock_three_hermit.irp.f b/src/tc_scf/fock_three_hermit.irp.f
new file mode 100644
index 00000000..fe8fbfd7
--- /dev/null
+++ b/src/tc_scf/fock_three_hermit.irp.f
@@ -0,0 +1,369 @@
+BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] 
+ implicit none
+  integer :: i,j
+  double precision :: contrib
+  fock_3_mat = 0.d0
+  if(.not.bi_ortho.and.three_body_h_tc)then
+   call give_fock_ia_three_e_total(1,1,contrib)
+!!  !$OMP PARALLEL                  &
+!!  !$OMP DEFAULT (NONE)            &
+!!  !$OMP PRIVATE (i,j,m,integral) & 
+!!  !$OMP SHARED (mo_num,three_body_3_index)
+!!  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do i = 1, mo_num
+    do j = 1, mo_num
+     call give_fock_ia_three_e_total(j,i,contrib)
+     fock_3_mat(j,i) = -contrib
+    enddo
+   enddo
+  else if(bi_ortho.and.three_body_h_tc)then
+!!  !$OMP END DO
+!!  !$OMP END PARALLEL
+!!  do i = 1, mo_num
+!!   do j = 1, i-1
+!!    mat_three(j,i) = mat_three(i,j)
+!!   enddo
+!!  enddo
+ endif
+
+END_PROVIDER 
+
+
+subroutine give_fock_ia_three_e_total(i,a,contrib)
+ implicit none
+ BEGIN_DOC
+! contrib is the TOTAL (same spins / opposite spins) contribution from the three body term to the Fock operator 
+!
+ END_DOC
+ integer, intent(in) :: i,a
+ double precision, intent(out) :: contrib
+ double precision :: int_1, int_2, int_3
+ double precision :: mos_i, mos_a, w_ia
+ double precision :: mos_ia, weight
+
+ integer :: mm, ipoint,k,l
+
+ int_1 = 0.d0
+ int_2 = 0.d0
+ int_3 = 0.d0
+ do mm = 1, 3
+  do ipoint = 1, n_points_final_grid
+   weight = final_weight_at_r_vector(ipoint)                                                                          
+   mos_i  = mos_in_r_array_transp(ipoint,i) 
+   mos_a  = mos_in_r_array_transp(ipoint,a) 
+   mos_ia = mos_a * mos_i
+   w_ia   = x_W_ij_erf_rk(ipoint,mm,i,a) 
+     
+   int_1  += weight * fock_3_w_kk_sum(ipoint,mm) * (4.d0 * fock_3_rho_beta(ipoint) * w_ia               & 
+                                                  + 2.0d0 * mos_ia * fock_3_w_kk_sum(ipoint,mm)         & 
+                                                  - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,i) * mos_a      & 
+                                                  - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,a) * mos_i      )
+   int_2  += weight * (-1.d0) * ( 2.0d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * w_ia                     & 
+                                + 2.0d0 * fock_3_rho_beta(ipoint) * fock_3_w_ki_wk_a(ipoint,mm,i,a)   & 
+                                + 1.0d0 * mos_ia * fock_3_trace_w_tilde(ipoint,mm)                    )
+
+   int_3  += weight *   1.d0  * (fock_3_w_kl_wla_phi_k(ipoint,mm,i) * mos_a + fock_3_w_kl_wla_phi_k(ipoint,mm,a) * mos_i & 
+                                +fock_3_w_ki_mos_k(ipoint,mm,i)     * fock_3_w_ki_mos_k(ipoint,mm,a)                     )
+  enddo
+ enddo
+ contrib = int_1 + int_2 + int_3
+
+end
+
+! ---
+
+BEGIN_PROVIDER [double precision, diag_three_elem_hf]
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
+  else
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
+
+    endif
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
+ implicit none 
+ integer :: h,p,i,j
+ double precision :: direct_int, exch_int, exchange_int_231, exchange_int_312
+ double precision :: exchange_int_23, exchange_int_12, exchange_int_13 
+
+ fock_3_mat_a_op_sh = 0.d0
+ do h = 1, mo_num
+  do p = 1, mo_num
+   !F_a^{ab}(h,p) 
+   do i = 1, elec_beta_num ! beta 
+    do j = elec_beta_num+1, elec_alpha_num ! alpha
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)    ! <hji|pji>
+     call  give_integrals_3_body(h,j,i,j,p,i,exch_int)   
+     fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int
+    enddo
+   enddo
+   !F_a^{aa}(h,p)
+   do i = 1, elec_beta_num ! alpha 
+    do j = elec_beta_num+1, elec_alpha_num ! alpha
+       call  give_integrals_3_body(h,j,i,p,j,i,direct_int) 
+       call  give_integrals_3_body(h,j,i,i,p,j,exchange_int_231)
+       call  give_integrals_3_body(h,j,i,j,i,p,exchange_int_312) 
+       call  give_integrals_3_body(h,j,i,p,i,j,exchange_int_23) 
+       call  give_integrals_3_body(h,j,i,i,j,p,exchange_int_12)
+       call  give_integrals_3_body(h,j,i,j,p,i,exchange_int_13)  
+       fock_3_mat_a_op_sh(h,p) -= ( direct_int + exchange_int_231 + exchange_int_312 & 
+              -  exchange_int_23 & ! i <-> j
+              -  exchange_int_12 & ! p <-> j
+              -  exchange_int_13  )! p <-> i
+    enddo 
+   enddo
+  enddo
+ enddo
+! symmetrized 
+! do p = 1, elec_beta_num
+!  do h = elec_alpha_num +1, mo_num
+!   fock_3_mat_a_op_sh(h,p) = fock_3_mat_a_op_sh(p,h)
+!  enddo
+! enddo
+ 
+! do h = elec_beta_num+1, elec_alpha_num
+!  do p = elec_alpha_num +1, mo_num
+!   !F_a^{bb}(h,p) 
+!   do i = 1, elec_beta_num
+!    do j = i+1, elec_beta_num
+!     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+!     call  give_integrals_3_body(h,j,i,p,i,j,exch_int)   
+!     fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int
+!    enddo
+!   enddo
+!  enddo
+! enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_mat_b_op_sh, (mo_num, mo_num)]
+ implicit none 
+ integer :: h,p,i,j
+ double precision :: direct_int, exch_int
+ fock_3_mat_b_op_sh = 0.d0
+ do h = 1, elec_beta_num
+  do p = elec_alpha_num +1, mo_num
+   !F_b^{aa}(h,p) 
+   do i = 1, elec_beta_num
+    do j = elec_beta_num+1, elec_alpha_num
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+     call  give_integrals_3_body(h,j,i,p,i,j,exch_int)   
+     fock_3_mat_b_op_sh(h,p) += direct_int - exch_int
+    enddo
+   enddo
+
+   !F_b^{ab}(h,p) 
+   do i = elec_beta_num+1, elec_beta_num
+    do j = 1, elec_beta_num
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+     call  give_integrals_3_body(h,j,i,j,p,i,exch_int)   
+     fock_3_mat_b_op_sh(h,p) += direct_int - exch_int
+    enddo
+   enddo
+ 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kk_sum, (n_points_final_grid,3)]
+ implicit none
+ integer :: mm, ipoint,k
+ double precision :: w_kk
+ fock_3_w_kk_sum = 0.d0
+ do k = 1, elec_beta_num
+  do mm = 1, 3
+   do ipoint = 1, n_points_final_grid
+    w_kk   = x_W_ij_erf_rk(ipoint,mm,k,k) 
+    fock_3_w_kk_sum(ipoint,mm) += w_kk
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_ki_mos_k, (n_points_final_grid,3,mo_num)]
+ implicit none
+ integer :: mm, ipoint,k,i
+ double precision :: w_ki, mo_k
+ fock_3_w_ki_mos_k = 0.d0
+ do i = 1, mo_num
+  do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     w_ki   = x_W_ij_erf_rk(ipoint,mm,k,i) 
+     mo_k = mos_in_r_array(k,ipoint)
+     fock_3_w_ki_mos_k(ipoint,mm,i) += w_ki * mo_k
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_w_kl, (n_points_final_grid,3)]
+ implicit none
+ integer :: k,j,ipoint,mm
+ double precision :: w_kj
+ fock_3_w_kl_w_kl = 0.d0
+ do j = 1, elec_beta_num
+  do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     w_kj   = x_W_ij_erf_rk(ipoint,mm,k,j) 
+     fock_3_w_kl_w_kl(ipoint,mm) += w_kj * w_kj
+    enddo
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_rho_beta, (n_points_final_grid)]
+ implicit none
+ integer :: ipoint,k
+ fock_3_rho_beta = 0.d0
+ do ipoint = 1, n_points_final_grid
+  do k = 1, elec_beta_num
+   fock_3_rho_beta(ipoint) += mos_in_r_array(k,ipoint) * mos_in_r_array(k,ipoint)
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_mo_k_mo_l, (n_points_final_grid,3)]
+ implicit none
+ integer :: ipoint,k,l,mm
+ double precision :: mos_k, mos_l, w_kl
+ fock_3_w_kl_mo_k_mo_l = 0.d0
+ do k = 1, elec_beta_num
+  do l = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     mos_k  = mos_in_r_array_transp(ipoint,k) 
+     mos_l  = mos_in_r_array_transp(ipoint,l) 
+     w_kl   = x_W_ij_erf_rk(ipoint,mm,l,k)
+     fock_3_w_kl_mo_k_mo_l(ipoint,mm) += w_kl * mos_k * mos_l 
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_ki_wk_a, (n_points_final_grid,3,mo_num, mo_num)]
+ implicit none
+ integer :: ipoint,i,a,k,mm
+ double precision :: w_ki,w_ka
+ fock_3_w_ki_wk_a = 0.d0
+ do i = 1, mo_num
+  do a = 1, mo_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     do k = 1, elec_beta_num
+      w_ki   = x_W_ij_erf_rk(ipoint,mm,k,i)
+      w_ka   = x_W_ij_erf_rk(ipoint,mm,k,a)
+      fock_3_w_ki_wk_a(ipoint,mm,a,i) += w_ki * w_ka
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_trace_w_tilde, (n_points_final_grid,3)]
+ implicit none
+ integer :: ipoint,k,mm
+ fock_3_trace_w_tilde = 0.d0
+ do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     fock_3_trace_w_tilde(ipoint,mm) += fock_3_w_ki_wk_a(ipoint,mm,k,k)
+    enddo
+   enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3,mo_num)]
+ implicit none
+ integer :: ipoint,a,k,mm,l
+ double precision :: w_kl,w_la, mo_k
+ fock_3_w_kl_wla_phi_k = 0.d0
+ do a = 1, mo_num
+  do k = 1, elec_beta_num 
+   do l = 1, elec_beta_num
+    do mm = 1, 3
+     do ipoint = 1, n_points_final_grid
+      w_kl   = x_W_ij_erf_rk(ipoint,mm,l,k)
+      w_la   = x_W_ij_erf_rk(ipoint,mm,l,a)
+      mo_k  = mos_in_r_array_transp(ipoint,k) 
+      fock_3_w_kl_wla_phi_k(ipoint,mm,a) += w_kl * w_la * mo_k
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
diff --git a/src/tc_scf/fock_vartc.irp.f b/src/tc_scf/fock_vartc.irp.f
new file mode 100644
index 00000000..03899b07
--- /dev/null
+++ b/src/tc_scf/fock_vartc.irp.f
@@ -0,0 +1,287 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_vartc_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_vartc_integral_beta , (ao_num, ao_num)]
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  two_e_vartc_integral_alpha = 0.d0
+  two_e_vartc_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                           &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)                 &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_vartc_tot, &
+ !$OMP         two_e_vartc_integral_alpha, two_e_vartc_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_vartc_tot(k,i,l,j)
+          I_kjli = ao_two_e_vartc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_vartc_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_vartc_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_alpha, (ao_num, ao_num)]
+
+  implicit none
+
+  Fock_matrix_vartc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_vartc_integral_alpha 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_beta, (ao_num, ao_num)]
+
+  implicit none
+
+  Fock_matrix_vartc_ao_beta = ao_one_e_integrals_tc_tot + two_e_vartc_integral_beta 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_alpha, (mo_num, mo_num) ]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_alpha, size(Fock_matrix_vartc_ao_alpha, 1) &
+                        , Fock_matrix_vartc_mo_alpha, size(Fock_matrix_vartc_mo_alpha, 1) )
+  if(three_body_h_tc) then
+    Fock_matrix_vartc_mo_alpha += fock_3e_uhf_mo_a
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_beta, (mo_num,mo_num) ]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_beta, size(Fock_matrix_vartc_ao_beta, 1) &
+                        , Fock_matrix_vartc_mo_beta, size(Fock_matrix_vartc_mo_beta, 1) )
+  if(three_body_h_tc) then
+    Fock_matrix_vartc_mo_beta += fock_3e_uhf_mo_b
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad_vartc]
+
+  implicit none
+  integer          :: i, k
+  double precision :: grad_left, grad_right
+
+  grad_left  = 0.d0
+  grad_right = 0.d0
+
+  do i = 1, elec_beta_num ! doc --> SOMO
+    do k = elec_beta_num+1, elec_alpha_num
+      grad_left  = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i)))
+      grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  do i = 1, elec_beta_num ! doc --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_left  = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i)))
+      grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_left  = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i)))
+      grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k)))
+    enddo
+  enddo
+
+  grad_vartc = grad_left + grad_right
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_vartc_mo_tot, size(Fock_matrix_vartc_mo_tot, 1) &
+                        , Fock_matrix_vartc_ao_tot, size(Fock_matrix_vartc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_tot, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_diag_mo_tot, (mo_num)]
+
+  implicit none
+  integer :: i, j, n
+
+  if(elec_alpha_num == elec_beta_num) then
+    Fock_matrix_vartc_mo_tot = Fock_matrix_vartc_mo_alpha
+  else
+
+    do j = 1, elec_beta_num
+      ! F-K
+      do i = 1, elec_beta_num !CC
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))&
+            - (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+      ! F+K/2
+      do i = elec_beta_num+1, elec_alpha_num  !CA
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+      ! F
+      do i = elec_alpha_num+1, mo_num !CV
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))
+      enddo
+    enddo
+
+    do j = elec_beta_num+1, elec_alpha_num
+      ! F+K/2
+      do i = 1, elec_beta_num !AC
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+      ! F
+      do i = elec_beta_num+1, elec_alpha_num !AA
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))
+      enddo
+      ! F-K/2
+      do i = elec_alpha_num+1, mo_num !AV
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+    enddo
+    
+    do j = elec_alpha_num+1, mo_num
+      ! F
+      do i = 1, elec_beta_num !VC
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))
+      enddo
+      ! F-K/2
+      do i = elec_beta_num+1, elec_alpha_num !VA
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+      ! F+K
+      do i = elec_alpha_num+1, mo_num !VV
+         Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) &
+             + (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j))
+      enddo
+    enddo
+    if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+        do i = elec_beta_num+1, elec_alpha_num
+          Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+        do i = elec_alpha_num+1, mo_num
+          Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+        do i = elec_alpha_num+1, mo_num
+          Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
+      enddo
+    endif
+
+  endif
+
+  do i = 1, mo_num
+    Fock_matrix_vartc_diag_mo_tot(i) = Fock_matrix_vartc_mo_tot(i,i)
+  enddo
+
+  if(frozen_orb_scf)then
+    integer :: iorb, jorb
+    do i = 1, n_core_orb
+     iorb = list_core(i)
+     do j = 1, n_act_orb
+      jorb = list_act(j)
+      Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0
+      Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0
+     enddo
+    enddo
+  endif
+
+  if(no_oa_or_av_opt)then
+    do i = 1, n_act_orb
+      iorb = list_act(i)
+      do j = 1, n_inact_orb
+        jorb = list_inact(j)
+        Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+      do j = 1, n_virt_orb
+        jorb = list_virt(j)
+        Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+      do j = 1, n_core_orb
+        jorb = list_core(j)
+        Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0                                                                                                                 
+      enddo
+    enddo
+  endif
+
+  !call check_sym(Fock_matrix_vartc_mo_tot, mo_num)
+  !do i = 1, mo_num
+  !  write(*,'(100(F15.8, I4))') Fock_matrix_vartc_mo_tot(i,:)
+  !enddo
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/tc_scf/integrals_in_r_stuff.irp.f b/src/tc_scf/integrals_in_r_stuff.irp.f
new file mode 100644
index 00000000..3ce85a97
--- /dev/null
+++ b/src/tc_scf/integrals_in_r_stuff.irp.f
@@ -0,0 +1,391 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, tc_scf_dm_in_r, (n_points_final_grid) ]
+
+  implicit none
+  integer :: i, j
+
+  tc_scf_dm_in_r = 0.d0
+  do i = 1, n_points_final_grid
+    do j = 1, elec_beta_num
+      tc_scf_dm_in_r(i) += mos_r_in_r_array(j,i) * mos_l_in_r_array(j,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, w_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: ipoint, j, xi
+
+  w_sum_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        !w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,j)
+        w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ww_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer          :: ipoint, j, xi
+  double precision :: tmp
+
+  ww_sum_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        tmp = x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+        ww_sum_in_r(ipoint,xi) += tmp * tmp
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_r_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_r_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_r_in_r(ipoint,xi,i) += mos_r_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_l_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_l_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_l_in_r(ipoint,xi,i) += mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        W1_in_r(ipoint,xi) += W1_l_in_r(ipoint,xi,j) * mos_r_in_r_array_transp(ipoint,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_diag_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_diag_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        W1_diag_in_r(ipoint,xi) += mos_r_in_r_array_transp(ipoint,j) * mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+  v_sum_in_r = 0.d0
+  do i = 1, elec_beta_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          v_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_W1_r_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, m, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_W1_r_in_r = 0.d0
+  do i = 1, mo_num
+    do m = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_W1_r_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,m,i) * W1_r_in_r(ipoint,xi,m)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_W1_l_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_W1_l_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_W1_l_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * W1_l_in_r(ipoint,xi,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+subroutine direct_term_imj_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | i m j > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight, tmp
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+      !integral += ( mos_l_in_r_array(a,ipoint) * mos_r_in_r_array(i,ipoint) * w_sum_in_r(ipoint,xi) * w_sum_in_r(ipoint,xi) & 
+      !            + 2.d0 * tc_scf_dm_in_r(ipoint) * w_sum_in_r(ipoint,xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) ) * weight
+
+      tmp = w_sum_in_r(ipoint,xi)
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * tmp * tmp & 
+                  + 2.d0 * tc_scf_dm_in_r(ipoint) * tmp * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i)       &
+                  ) * weight
+    enddo
+  enddo
+
+end 
+
+! ---
+
+subroutine exch_term_jmi_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | j m i > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi, j
+  double precision              :: weight, tmp
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      tmp = 0.d0
+      do j = 1, elec_beta_num
+        tmp = tmp + x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) 
+      enddo
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_r_in_r(ipoint,xi,i) * w_sum_in_r(ipoint,xi) & 
+                  + tc_scf_dm_in_r(ipoint) * tmp                                                       &
+                  + mos_r_in_r_array_transp(ipoint,i) * W1_l_in_r(ipoint,xi,a) * w_sum_in_r(ipoint,xi) & 
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine exch_term_ijm_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | i j m > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * v_sum_in_r(ipoint,xi) & 
+                  + 2.d0 * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) * W1_in_r(ipoint,xi)                             &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine direct_term_ijj_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j = 1, elec_beta_num) < a j j | i j j > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * ww_sum_in_r(ipoint,xi) & 
+                  + 2.d0 * W1_diag_in_r(ipoint, xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i)                        &
+                  ) * weight
+    enddo
+  enddo
+
+end 
+
+! ---
+
+subroutine cyclic_term_jim_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | j i m > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i)  & 
+                  + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i)  &
+                  + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i)                &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine cyclic_term_mji_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | m j i > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i)  & 
+                  + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i)                &
+                  + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i)  &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
diff --git a/src/tc_scf/minimize_tc_angles.irp.f b/src/tc_scf/minimize_tc_angles.irp.f
new file mode 100644
index 00000000..1363e62b
--- /dev/null
+++ b/src/tc_scf/minimize_tc_angles.irp.f
@@ -0,0 +1,13 @@
+program print_angles
+ implicit none
+ BEGIN_DOC
+! program that minimizes the angle between left- and right-orbitals when degeneracies are found in the TC-Fock matrix
+ END_DOC
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch my_n_pt_r_grid my_n_pt_a_grid
+!  call sort_by_tc_fock
+  call minimize_tc_orb_angles
+end
+
diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f
new file mode 100644
index 00000000..735349ba
--- /dev/null
+++ b/src/tc_scf/molden_lr_mos.irp.f
@@ -0,0 +1,176 @@
+program molden
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, 'starting ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call molden_lr
+end
+subroutine molden_lr
+  implicit none
+  BEGIN_DOC
+  ! Produces a Molden file
+  END_DOC
+  character*(128)                :: output
+  integer                        :: i_unit_output,getUnitAndOpen
+  integer                        :: i,j,k,l
+  double precision, parameter :: a0 = 0.529177249d0
+
+  PROVIDE ezfio_filename
+
+  output=trim(ezfio_filename)//'.mol'
+  print*,'output = ',trim(output)
+
+  i_unit_output = getUnitAndOpen(output,'w')
+
+  write(i_unit_output,'(A)') '[Molden Format]'
+
+  write(i_unit_output,'(A)') '[Atoms] Angs'
+  do i = 1, nucl_num
+    write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))')                   &
+        trim(element_name(int(nucl_charge(i)))),                     &
+        i,                                                           &
+        int(nucl_charge(i)),                                         &
+        nucl_coord(i,1)*a0, nucl_coord(i,2)*a0, nucl_coord(i,3)*a0
+  enddo
+
+  write(i_unit_output,'(A)') '[GTO]'
+
+  character*(1)                  :: character_shell
+  integer                        :: i_shell,i_prim,i_ao
+  integer                        :: iorder(ao_num)
+  integer                        :: nsort(ao_num)
+
+  i_shell = 0
+  i_prim = 0
+  do i=1,nucl_num
+    write(i_unit_output,*) i, 0
+    do j=1,nucl_num_shell_aos(i)
+      i_shell +=1
+      i_ao = nucl_list_shell_aos(i,j)
+      character_shell = trim(ao_l_char(i_ao))
+      write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
+      do k = 1, ao_prim_num(i_ao)
+        i_prim +=1
+        write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
+      enddo
+      l = i_ao
+      do while ( ao_l(l) == ao_l(i_ao) )
+        nsort(l) = i*10000 + j*100
+        l += 1
+        if (l > ao_num) exit
+      enddo
+    enddo
+    write(i_unit_output,*)''
+  enddo
+
+
+  do i=1,ao_num
+    iorder(i) = i
+    ! p
+    if      ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 3
+    ! d
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 6
+    ! f
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 6
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 7
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 8
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 9
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 10
+    ! g
+    else if ((ao_power(i,1) == 4 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 4 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 4 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 6
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 7
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 8
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 9
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 10
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 11
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 12
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 13
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 14
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 15
+    endif
+  enddo
+
+  call isort(nsort,iorder,ao_num)
+  write(i_unit_output,'(A)') '[MO]'
+  do i=1,mo_num
+    write (i_unit_output,*) 'Sym= 1'
+    write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i)
+    write (i_unit_output,*) 'Spin= Alpha'
+    write (i_unit_output,*) 'Occup=', mo_occ(i)
+    do j=1,ao_num
+      write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i)
+    enddo
+
+    write (i_unit_output,*) 'Sym= 1'
+    write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i)
+    write (i_unit_output,*) 'Spin= Alpha'
+    write (i_unit_output,*) 'Occup=', mo_occ(i)
+    do j=1,ao_num
+      write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i)
+    enddo
+  enddo
+  close(i_unit_output)
+end
+
diff --git a/src/tc_scf/print_fit_param.irp.f b/src/tc_scf/print_fit_param.irp.f
new file mode 100644
index 00000000..f8bcfa7f
--- /dev/null
+++ b/src/tc_scf/print_fit_param.irp.f
@@ -0,0 +1,60 @@
+program print_fit_param
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !call create_guess
+  !call orthonormalize_mos
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer :: i
+
+  mu_erf = 1.d0
+  touch mu_erf
+
+  print *, ' fit for (1 - erf(x))^2'
+  do i = 1, n_max_fit_slat
+    print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i)
+  enddo
+
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
new file mode 100644
index 00000000..645742c8
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -0,0 +1,374 @@
+! ---
+
+subroutine rh_tcscf_diis()
+
+  implicit none
+
+  integer                       :: i, j, it
+  integer                       :: dim_DIIS, index_dim_DIIS
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta
+  double precision              :: tc_grad, g_save, g_delta, g_delta_th
+  double precision              :: level_shift_save, rate_th
+  double precision              :: t0, t1
+  double precision              :: er_DIIS, er_delta, er_save, er_delta_th
+  double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  it          = 0
+  e_save      = 0.d0
+  dim_DIIS    = 0
+  g_delta_th  = 1d0
+  er_delta_th = 1d0
+  rate_th     = 0.1d0 
+
+  allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num))
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF))
+  F_DIIS = 0.d0
+  E_DIIS = 0.d0
+
+  call write_time(6)
+
+  ! ---
+
+  PROVIDE level_shift_TCSCF
+  PROVIDE mo_l_coef mo_r_coef
+
+  !write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+  !  '====', '================', '================', '================', '================', '================' &
+  !        , '================', '================', '================', '====', '========'
+  !write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+  !  ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+  !        , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+  !write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+  !  '====', '================', '================', '================', '================', '================' &
+  !        , '================', '================', '================', '====', '========'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '====', '========'
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = TC_HF_energy
+  etc_1e  = TC_HF_one_e_energy
+  etc_2e  = TC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  !tc_grad = grad_non_hermit
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+
+  e_save  = etc_tot
+  !g_save  = tc_grad
+  er_save = er_DIIS
+
+  call wall_time(t1)
+  !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+  !  it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  ! ---
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  !do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. dsqrt(thresh_tcscf)))
+  do while(er_DIIS .gt. dsqrt(thresh_tcscf))
+
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if(dabs(e_delta) > 1.d-12) then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao           (i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS)
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    !g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    if((er_delta > rate_th * er_save) .and. (it > 1)) then
+
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+    endif
+
+    ! ---
+
+    !g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+    do while((er_delta > rate_th * er_save) .and. (it > 1))
+      print *, ' big or bad step '
+      !print *, g_delta , rate_th *  g_save
+      print *, er_delta, rate_th * er_save
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      !g_delta  = grad_non_hermit         -  g_save
+      er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    !tc_grad  = grad_non_hermit
+    er_DIIS  = maxval(abs(FQS_SQF_mo))
+    e_delta  = dabs(etc_tot - e_save)
+    !g_delta  = tc_grad - g_save
+    er_delta = er_DIIS - er_save
+    
+    e_save           = etc_tot
+    !g_save           = tc_grad
+    level_shift_save = level_shift_TCSCF
+    er_save          = er_DIIS
+
+    !g_delta_th  = dabs(tc_grad) ! g_delta)
+    er_delta_th = dabs(er_DIIS) !er_delta)
+
+    call wall_time(t1)
+    !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    !  it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    if(er_delta .lt. 0.d0) then
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
+
+  call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: it, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: E_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
new file mode 100644
index 00000000..30798e3d
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -0,0 +1,129 @@
+! ---
+
+subroutine rh_tcscf_simple()
+
+  implicit none
+  integer                       :: i, j, it, dim_DIIS
+  double precision              :: t0, t1
+  double precision              :: e_save, e_delta, rho_delta
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad
+  double precision              :: er_DIIS
+  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
+
+  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  if(.not. bi_ortho) then
+   print *, ' grad_hermit = ', grad_hermit
+   call save_good_hermit_tc_eigvectors
+   TOUCH mo_coef 
+   call save_mos
+  endif
+
+  ! ---
+
+  if(bi_ortho) then
+
+    PROVIDE level_shift_tcscf
+    PROVIDE mo_l_coef mo_r_coef
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+            , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+
+    ! first iteration (HF orbitals)
+    call wall_time(t0)
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad = grad_non_hermit
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    do while(tc_grad .gt. dsqrt(thresh_tcscf))
+      call wall_time(t0)
+
+      it += 1
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        stop
+      endif
+
+      mo_l_coef = fock_tc_leigvec_ao
+      mo_r_coef = fock_tc_reigvec_ao
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      etc_tot = TC_HF_energy
+      etc_1e  = TC_HF_one_e_energy
+      etc_2e  = TC_HF_two_e_energy
+      etc_3e  = 0.d0
+      if(three_body_h_tc) then
+        etc_3e = diag_three_elem_hf
+      endif
+      tc_grad = grad_non_hermit
+      er_DIIS = maxval(abs(FQS_SQF_mo))
+      e_delta = dabs(etc_tot - e_save)
+      e_save  = etc_tot
+
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+      call wall_time(t1)
+      write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+        it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    enddo
+
+  else
+
+   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) )
+      print*,'grad_hermit = ',grad_hermit
+      it += 1
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
+      call save_good_hermit_tc_eigvectors
+      TOUCH mo_coef 
+      call save_mos
+    enddo
+
+  endif
+
+  print *, ' TCSCF Simple converged !'
+  call print_energy_and_mos()
+
+  deallocate(rho_old, rho_new)
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_vartcscf_simple.irp.f b/src/tc_scf/rh_vartcscf_simple.irp.f
new file mode 100644
index 00000000..ecb0709e
--- /dev/null
+++ b/src/tc_scf/rh_vartcscf_simple.irp.f
@@ -0,0 +1,89 @@
+! ---
+
+subroutine rh_vartcscf_simple()
+
+  implicit none
+  integer          :: i, j, it, dim_DIIS
+  double precision :: t0, t1
+  double precision :: e_save, e_delta, rho_delta
+  double precision :: etc_tot, etc_1e, etc_2e, etc_3e
+  double precision :: er_DIIS
+
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  PROVIDE level_shift_tcscf
+  PROVIDE mo_r_coef
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '====', '========'
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')                      &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = VARTC_HF_energy
+  etc_1e  = VARTC_HF_one_e_energy
+  etc_2e  = VARTC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+  e_save  = etc_tot
+
+  call wall_time(t1)
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  do while(er_DIIS .gt. dsqrt(thresh_tcscf))
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_tcscf_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    mo_r_coef = fock_vartc_eigvec_ao
+    mo_l_coef = mo_r_coef
+    call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    etc_tot = VARTC_HF_energy
+    etc_1e  = VARTC_HF_one_e_energy
+    etc_2e  = VARTC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+  enddo
+
+  print *, ' VAR-TCSCF Simple converged !'
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
new file mode 100644
index 00000000..31999c18
--- /dev/null
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -0,0 +1,367 @@
+
+! ---
+
+program rotate_tcscf_orbitals
+
+  BEGIN_DOC
+  ! TODO : Rotate the bi-orthonormal orbitals in order to minimize left-right angles when degenerate
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  bi_ortho = .True.
+  touch bi_ortho
+
+  call maximize_overlap()
+
+end
+
+! ---
+
+subroutine maximize_overlap()
+
+  implicit none
+  integer                       :: i, m, n
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: C(:,:), R(:,:), L(:,:), W(:,:), e(:)
+  double precision, allocatable :: S(:,:)
+
+  n = ao_num
+  m = mo_num
+
+  allocate(L(n,m), R(n,m), C(n,m), W(n,n), e(m))
+  L = mo_l_coef
+  R = mo_r_coef
+  C = mo_coef
+  W = ao_overlap
+
+  print*, ' fock matrix diag elements'
+  do i = 1, m
+    e(i) = Fock_matrix_tc_mo_tot(i,i)
+    print*, e(i)
+  enddo
+
+  ! ---
+   
+  print *, ' overlap before :'
+  print *, ' '
+
+  allocate(S(m,m)) 
+
+  call LTxSxR(n, m, L, W, R, S)
+  !print*, " L.T x R"
+  !do i = 1, m
+  !  write(*, '(100(F16.10,X))') S(i,i)
+  !enddo
+  call LTxSxR(n, m, L, W, C, S)
+  print*, " L.T x C"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+  call LTxSxR(n, m, C, W, R, S)
+  print*, " C.T x R"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+  call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R)
+
+  ! ---
+   
+  print *, ' overlap after :'
+  print *, ' '
+
+  allocate(S(m,m)) 
+
+  call LTxSxR(n, m, L, W, R, S)
+  !print*, " L.T x R"
+  !do i = 1, m
+  !  write(*, '(100(F16.10,X))') S(i,i)
+  !enddo
+  call LTxSxR(n, m, L, W, C, S)
+  print*, " L.T x C"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+  call LTxSxR(n, m, C, W, R, S)
+  print*, " C.T x R"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+  mo_l_coef = L
+  mo_r_coef = R
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+  ! ---
+
+  deallocate(L, R, C, W, e)
+
+end subroutine maximize_overlap
+
+! ---
+
+subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: e0(m), W0(n,n), C0(n,m)
+  double precision, intent(inout) :: L0(n,m), R0(n,m)
+
+
+  integer                         :: i, j, k, kk, mm, id1, tot_deg
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:)
+  !double precision, allocatable   :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:)
+  double precision, allocatable   :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:)
+  !real*8                          :: S(m,m), Snew(m,m), T(m,m)
+
+  id1 = 700
+  allocate(S(id1,id1), Snew(id1,id1), T(id1,id1))
+
+  ! ---
+
+  allocate( deg_num(m) )
+  do i = 1, m
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, m-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, m
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  tot_deg = 0
+  do i = 1, m
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
+    endif
+  enddo
+
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
+  ! ---
+
+  do i = 1, m
+    mm = deg_num(i)
+
+    if(mm .gt. 1) then
+
+      allocate(L(n,mm), R(n,mm), C(n,mm))
+      do j = 1, mm
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      ! C.T x W0 x R
+      allocate(tmp(mm,n), Stmp(mm,mm))
+      call dgemm( 'T', 'N', mm, n, n, 1.d0       &
+                , C, size(C, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', mm, mm, n, 1.d0        &
+                , tmp, size(tmp, 1), R, size(R, 1) &
+                , 0.d0, Stmp, size(Stmp, 1) )
+      deallocate(C, tmp)
+
+      S = 0.d0
+      do k = 1, mm
+        do kk = 1, mm
+          S(kk,k) = Stmp(kk,k)
+        enddo
+      enddo
+      deallocate(Stmp)
+
+      !print*, " overlap bef"
+      !do k = 1, mm
+      !  write(*, '(100(F16.10,X))') (S(k,kk), kk=1, mm)
+      !enddo
+    
+      T    = 0.d0
+      Snew = 0.d0
+      call maxovl(mm, mm, S, T, Snew)
+
+      !print*, " overlap aft"
+      !do k = 1, mm
+      !  write(*, '(100(F16.10,X))') (Snew(k,kk), kk=1, mm)
+      !enddo
+
+      allocate(Ttmp(mm,mm))
+      Ttmp(1:mm,1:mm) = T(1:mm,1:mm)
+
+      allocate(Lnew(n,mm), Rnew(n,mm))
+      call dgemm( 'N', 'N', n, mm, mm, 1.d0               &
+                , R, size(R, 1), Ttmp(1,1), size(Ttmp, 1) &
+                , 0.d0, Rnew, size(Rnew, 1) )
+      call dgemm( 'N', 'N', n, mm, mm, 1.d0               &
+                , L, size(L, 1), Ttmp(1,1), size(Ttmp, 1) &
+                , 0.d0, Lnew, size(Lnew, 1) )
+
+      deallocate(L, R)
+      deallocate(Ttmp)
+
+      ! ---
+
+      do j = 1, mm
+        L0(1:n,i+j-1) = Lnew(1:n,j)
+        R0(1:n,i+j-1) = Rnew(1:n,j)
+      enddo
+      deallocate(Lnew, Rnew)
+
+    endif
+  enddo
+
+  deallocate(S, Snew, T)
+
+end subroutine rotate_degen_eigvec_to_maximize_overlap
+
+! ---
+
+subroutine fix_right_to_one()
+
+  implicit none
+  integer                       :: i, j, m, n, mm, tot_deg
+  double precision              :: accu_d, accu_nd
+  double precision              :: de_thr, ei, ej, de
+  integer,          allocatable :: deg_num(:)
+  double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
+  double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
+
+  n = ao_num
+  m = mo_num
+
+  allocate(L0(n,m), R0(n,m), W(n,n), e0(m))
+  L0 = mo_l_coef
+  R0 = mo_r_coef
+  W  = ao_overlap
+
+  print*, ' fock matrix diag elements'
+  do i = 1, m
+    e0(i) = Fock_matrix_tc_mo_tot(i,i)
+    print*, e0(i)
+  enddo
+
+  ! ---
+
+  allocate( deg_num(m) )
+  do i = 1, m
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-6
+
+  do i = 1, m-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, m
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  deallocate(e0)
+
+  tot_deg = 0
+  do i = 1, m
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
+    endif
+  enddo
+
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
+  ! ---
+
+  do i = 1, m
+    mm = deg_num(i)
+
+    if(mm .gt. 1) then
+
+      allocate(L(n,mm), R(n,mm))
+      do j = 1, mm
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_weighted_orthog_svd(n, mm, W, R)
+      call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L)
+
+      ! ---
+
+      do j = 1, mm
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+      deallocate(L, R)
+
+    endif
+  enddo
+
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.)
+
+  deallocate(W, deg_num)
+
+  mo_l_coef = L0
+  mo_r_coef = R0
+  deallocate(L0, R0)
+
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+  print *, ' orbitals are rotated '
+
+  return
+end subroutine fix_right_to_one
+
+! ---
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
new file mode 100644
index 00000000..3c12118f
--- /dev/null
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -0,0 +1,406 @@
+
+
+! ---
+
+subroutine LTxSxR(n, m, L, S, R, C)
+
+  implicit none
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: L(n,m), S(n,n), R(n,m)
+  double precision, intent(out) :: C(m,m)
+  integer                       :: i, j
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: tmp(:,:)
+
+  ! L.T x S x R
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , L, size(L, 1), S, size(S, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), R, size(R, 1) &
+            , 0.d0, C, size(C, 1) )
+  deallocate(tmp)
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(j.eq.i) then
+        accu_d += dabs(C(j,i))
+      else
+        accu_nd += C(j,i) * C(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print*, ' accu_d  = ', accu_d
+  print*, ' accu_nd = ', accu_nd
+
+end subroutine LTxR
+
+! ---
+
+
+! ---
+
+subroutine minimize_tc_orb_angles()
+  BEGIN_DOC
+  ! routine that minimizes the angle between left- and right-orbitals when degeneracies are found
+  END_DOC
+
+  implicit none
+  logical          :: good_angles
+  integer          :: i
+  double precision :: thr_deg
+
+  good_angles = .False.
+  thr_deg = thr_degen_tc
+
+  call print_energy_and_mos()
+
+  print *, ' Minimizing the angles between the TC orbitals'
+  i = 1
+  do while (.not. good_angles)
+    print *, ' iteration = ', i
+    call routine_save_rotated_mos(thr_deg, good_angles)
+    thr_deg *= 10.d0
+    i += 1
+    if(i .gt. 100) then
+      print *, ' minimize_tc_orb_angles does not seem to converge ..'
+      print *, ' Something is weird in the tc orbitals ...'
+      print *, ' STOPPING'
+      stop
+    endif
+  enddo
+  print *, ' Converged ANGLES MINIMIZATION !!'
+
+  call print_angles_tc()
+  call print_energy_and_mos()
+
+end
+
+! ---
+
+subroutine routine_save_rotated_mos(thr_deg, good_angles)
+
+  implicit none
+
+  double precision, intent(in)  :: thr_deg
+  logical,          intent(out) :: good_angles
+
+  integer                       :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst
+  double precision              :: max_angle, norm
+  integer,          allocatable :: list_degen(:,:)
+  double precision, allocatable :: new_angles(:)
+  double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:)
+  double precision, allocatable :: mo_r_coef_new(:,:)
+  double precision, allocatable :: fock_diag(:),s_mat(:,:)
+  double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:)
+  double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:)
+
+  good_angles = .False.
+
+  allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
+
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' THRESHOLD FOR DEGENERACIES ::: ', thr_deg
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' Starting with the following TC energy gradient :', grad_non_hermit
+
+  mo_r_coef_good = mo_r_coef
+  mo_l_coef_good = mo_l_coef
+
+  allocate(mo_r_coef_new(ao_num, mo_num))
+  mo_r_coef_new = mo_r_coef
+  do i = 1, mo_num
+    norm = 1.d0/dsqrt(overlap_mo_r(i,i))
+    do j = 1, ao_num
+      mo_r_coef_new(j,i) *= norm
+    enddo
+  enddo
+
+  allocate(list_degen(mo_num,0:mo_num), s_mat(mo_num,mo_num), fock_diag(mo_num))
+  do i = 1, mo_num
+    fock_diag(i) = Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+ ! compute the overlap between the left and rescaled right
+  call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat)
+! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list)
+  call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list)
+  print *, ' fock_matrix_mo'
+  do i = 1, mo_num
+    print *, i, fock_diag(i), angle_left_right(i)
+  enddo
+   
+  do i = 1, n_degen_list
+!  ifirst = list_degen(1,i)
+!  ilast  = list_degen(2,i)
+!  n_degen = ilast - ifirst +1
+
+    n_degen = list_degen(i,0)
+    if(n_degen .eq. 1) cycle
+
+    allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen))
+    allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen))
+    allocate(T(n_degen,n_degen), Snew(n_degen,n_degen))
+
+    do j = 1, n_degen
+      mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
+      mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
+    enddo
+    ! Orthogonalization of right functions
+    print *, ' Orthogonalization of RIGHT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_r_coef_tmp, ao_overlap)
+  
+    ! Orthogonalization of left functions
+    print *, ' Orthogonalization of LEFT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
+
+    print *, ' Overlap left-right '
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+    do j = 1, n_degen
+     write(*,'(100(F8.4,X))') stmp(:,j)
+    enddo
+    call build_s_matrix(ao_num, n_degen, mo_l_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+
+    !print*,'LEFT/LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT/RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+
+    if(maxovl_tc) then
+      T    = 0.d0
+      Snew = 0.d0
+      call maxovl(n_degen, n_degen, stmp, T, Snew)
+    !print*,'overlap after'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')Snew(:,j)
+    !enddo
+      call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0                  &
+                , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) &
+                , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) )
+     call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'Overlap test'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    else 
+      mo_l_coef_new = mo_l_coef_tmp
+    endif
+
+    call impose_weighted_biorthog_svd(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp)
+
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'LAST OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_l_coef_new, ao_overlap, stmp)
+    !print*,'LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    do j = 1, n_degen
+!!!   mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j)
+!!!   mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j)
+      mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j)
+      mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j)
+    enddo
+
+    deallocate(stmp, smat2)
+    deallocate(mo_r_coef_tmp, mo_l_coef_tmp, mo_l_coef_new)
+    deallocate(T, Snew)
+  enddo
+
+  !allocate(stmp(mo_num, mo_num))
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_r_coef_good, ao_overlap, stmp)
+  !print*,'LEFT/RIGHT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_l_coef_good, ao_overlap, stmp)
+  !print*,'LEFT/LEFT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+  !call build_s_matrix(ao_num, mo_num, mo_r_coef_good, mo_r_coef_good, ao_overlap, stmp)
+  !print*,'RIGHT/RIGHT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+
+  mo_r_coef = mo_r_coef_good
+  mo_l_coef = mo_l_coef_good
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+  TOUCH mo_l_coef mo_r_coef
+
+  allocate(new_angles(mo_num))
+  new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num))
+  max_angle = maxval(new_angles)
+  good_angles = max_angle.lt.45.d0
+  print *, ' max_angle = ', max_angle
+  
+end
+
+! ---
+
+subroutine build_s_matrix(m, n, C1, C2, overlap, smat)
+
+  implicit none
+  integer,          intent(in)  :: m, n
+  double precision, intent(in)  :: C1(m,n), C2(m,n), overlap(m,m)
+  double precision, intent(out) :: smat(n,n)
+  integer                       :: i, j, k, l
+  double precision, allocatable :: S_tmp(:,:)
+
+  smat = 0.d0
+
+  !do i = 1, n
+  !  do j = 1, n
+  !    do k = 1, m
+  !      do l = 1, m
+  !        smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) 
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! C1.T x overlap
+  allocate(S_tmp(n,m))
+  call dgemm( 'T', 'N', n, m, m, 1.d0                    &
+            , C1, size(C1, 1), overlap, size(overlap, 1) &
+            , 0.d0, S_tmp, size(S_tmp, 1) )
+  ! C1.T x overlap x C2
+  call dgemm( 'N', 'N', n, n, m, 1.d0                     &
+            , S_tmp, size(S_tmp, 1), C2(1,1), size(C2, 1) &
+            , 0.d0, smat, size(smat, 1) )
+  deallocate(S_tmp)
+
+end
+
+! ---
+
+subroutine orthog_functions(m, n, coef, overlap)
+
+  implicit none
+
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: overlap(m,m)
+  double precision, intent(inout) :: coef(m,n)
+  double precision, allocatable   :: stmp(:,:)
+  integer                         :: j, k
+
+  allocate(stmp(n,n))
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+! print*,'overlap before'
+! do j = 1, n
+!  write(*,'(100(F16.10,X))')stmp(:,j)
+! enddo
+  call impose_orthog_svd_overlap(m, n, coef, overlap)
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+  do j = 1, n
+    ! ---
+    ! TODO: MANU check ici
+    !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    do k = 1, m
+      coef(k,j) *= 1.d0/dsqrt(stmp(j,j))
+    enddo
+    ! ---
+  enddo
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+
+ !print*,'overlap after'
+ !do j = 1, n
+ ! write(*,'(100(F16.10,X))')stmp(:,j)
+ !enddo
+
+ deallocate(stmp)
+
+end
+
+! ---
+
+subroutine print_angles_tc()
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right
+
+  print *, ' product of norms, angle between vectors'                                                                  
+  do i = 1, mo_num
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+!  print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i)
+    print *, left*right, angle_left_right(i)
+  enddo
+
+end
+
+! ---
+
+subroutine print_energy_and_mos()
+
+  implicit none
+  integer :: i
+
+  print *, ' '
+  print *, ' TC energy = ', TC_HF_energy
+  print *, ' TC SCF energy gradient = ', grad_non_hermit
+  print *, ' Max angle Left/right   = ', max_angle_left_right
+
+  if(max_angle_left_right .lt. 45.d0) then
+    print *, ' Maximum angle BELOW 45 degrees, everthing is OK !'
+  else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then
+    print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...'
+  else if(max_angle_left_right .gt. 75.d0) then
+    print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...'
+  endif
+
+  print *, ' Diag Fock elem, product of left/right norm, angle left/right '
+  do i = 1, mo_num
+    write(*, '(I3,X,100(F16.10,X))') i, Fock_matrix_tc_mo_tot(i,i), overlap_mo_l(i,i)*overlap_mo_r(i,i), angle_left_right(i)
+  enddo
+
+end
+
+! ---
+
+subroutine sort_by_tc_fock
+ implicit none 
+ integer, allocatable :: iorder(:)
+ double precision, allocatable :: mo_l_tmp(:,:), mo_r_tmp(:,:),fock(:)
+ allocate(iorder(mo_num),fock(mo_num),mo_l_tmp(ao_num, mo_num),mo_r_tmp(ao_num,mo_num))
+ integer :: i
+ mo_l_tmp = mo_l_coef
+ mo_r_tmp = mo_r_coef
+ do i = 1, mo_num
+  iorder(i) = i
+  fock(i) = Fock_matrix_tc_mo_tot(i,i)
+ enddo
+ call dsort(fock,iorder,mo_num)
+ do i = 1, mo_num
+  mo_l_coef(1:ao_num,i) = mo_l_tmp(1:ao_num,iorder(i))
+  mo_r_coef(1:ao_num,i) = mo_r_tmp(1:ao_num,iorder(i))
+ enddo
+ touch mo_l_coef mo_r_coef
+
+end
+
diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f
new file mode 100644
index 00000000..d3722098
--- /dev/null
+++ b/src/tc_scf/tc_petermann_factor.irp.f
@@ -0,0 +1,78 @@
+
+! ---
+
+program tc_petermann_factor
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: Pf_diag_av
+  double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:)
+
+  allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num))
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Sl, size(Sl, 1) )
+
+  print *, ''
+  print *, ' left-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sl(:,i)
+  enddo
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, Sr, size(Sr, 1) )
+
+  print *, ''
+  print *, ' right-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sr(:,i)
+  enddo
+
+  print *, ''
+  print *, ' Petermann matrix:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+      Pf(j,i) = Sl(j,i) * Sr(j,i)
+    enddo
+    write(*,'(100(F8.4,X))') Pf(:,i)
+  enddo
+
+  Pf_diag_av = 0.d0
+  do i = 1, mo_num
+    Pf_diag_av = Pf_diag_av + Pf(i,i)
+  enddo
+  Pf_diag_av = Pf_diag_av / dble(mo_num)
+
+  print *, ''
+  print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av
+
+  deallocate(Sl, Sr, Pf)
+
+  return
+end subroutine
+
+! ---
+
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
new file mode 100644
index 00000000..85389f30
--- /dev/null
+++ b/src/tc_scf/tc_scf.irp.f
@@ -0,0 +1,96 @@
+! ---
+
+program tc_scf
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  !call create_guess()
+  !call orthonormalize_mos()
+
+  PROVIDE tcscf_algorithm
+  PROVIDE var_tc
+
+  if(var_tc) then
+
+    print *, ' VAR-TC'
+
+    if(tcscf_algorithm == 'DIIS') then
+      print*, ' NOT implemented yet'
+    elseif(tcscf_algorithm == 'Simple') then
+      call rh_vartcscf_simple()
+    else
+      print *, ' not implemented yet', tcscf_algorithm
+      stop
+    endif
+
+  else
+
+    if(tcscf_algorithm == 'DIIS') then
+      call rh_tcscf_diis()
+    elseif(tcscf_algorithm == 'Simple') then
+      call rh_tcscf_simple()
+    else
+      print *, ' not implemented yet', tcscf_algorithm
+      stop
+    endif
+
+    call minimize_tc_orb_angles()
+    call print_energy_and_mos()
+
+  endif
+
+
+end
+
+! ---
+
+subroutine create_guess()
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
+
+  if(.not.exists) then
+    mo_label = 'Guess'
+    if(mo_guess_type == "HCore") then
+      mo_coef = ao_ortho_lowdin_coef
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+      TOUCH mo_coef
+      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.)
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+      SOFT_TOUCH mo_coef
+    elseif (mo_guess_type == "Huckel") then
+      call huckel_guess
+    else
+      print *,  'Unrecognized MO guess type : '//mo_guess_type
+      stop 1
+    endif
+    SOFT_TOUCH mo_label
+  endif
+
+end subroutine create_guess
+
+! ---
+
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
new file mode 100644
index 00000000..07da8a58
--- /dev/null
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -0,0 +1,46 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TC-SCF transition density matrix on the AO basis for BETA electrons 
+  END_DOC
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TC-SCF transition density matrix on the AO basis for ALPHA electrons 
+  END_DOC
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
+END_PROVIDER 
+
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! TC-SCF transition density matrix on the AO basis for ALPHA+BETA electrons 
+  END_DOC
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+END_PROVIDER 
+
+
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
new file mode 100644
index 00000000..c3de0322
--- /dev/null
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -0,0 +1,63 @@
+
+ BEGIN_PROVIDER [ double precision, TC_HF_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
+
+  BEGIN_DOC
+  ! TC Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components.
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  TC_HF_energy = nuclear_repulsion
+  TC_HF_one_e_energy = 0.d0
+  TC_HF_two_e_energy = 0.d0
+
+  do j = 1, ao_num
+    do i = 1, ao_num
+      TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+    enddo
+  enddo
+
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
+  TC_HF_energy += diag_three_elem_hf
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, VARTC_HF_energy]
+&BEGIN_PROVIDER [ double precision, VARTC_HF_one_e_energy]
+&BEGIN_PROVIDER [ double precision, VARTC_HF_two_e_energy]
+
+  implicit none
+  integer :: i, j
+
+  PROVIDE mo_r_coef
+
+  VARTC_HF_energy = nuclear_repulsion
+  VARTC_HF_one_e_energy = 0.d0
+  VARTC_HF_two_e_energy = 0.d0
+
+  do j = 1, ao_num
+    do i = 1, ao_num
+      VARTC_HF_two_e_energy += 0.5d0 * ( two_e_vartc_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
+                                       + two_e_vartc_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      VARTC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                             * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+    enddo
+  enddo
+
+  VARTC_HF_energy += VARTC_HF_one_e_energy + VARTC_HF_two_e_energy
+  VARTC_HF_energy += diag_three_elem_hf
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..0866cdaf
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,1059 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting test_ints ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  my_extra_grid_becke = .True.
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
+! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+! call test_total_grad_square
+! call test_ao_tc_int_chemist
+! call test_grid_points_ao
+! call test_tc_scf
+ !call test_int_gauss
+
+  !call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_mo()
+
+  !call test_tc_grad_and_lapl_ao()
+  !call test_tc_grad_square_ao()
+
+  !call test_two_e_tc_non_hermit_integral()
+
+  call test_tc_grad_square_ao_test()
+
+  PROVIDE TC_HF_energy VARTC_HF_energy
+  print *, '    TC_HF_energy = ',    TC_HF_energy
+  print *, ' VARTC_HF_energy = ', VARTC_HF_energy
+
+end
+
+! ---
+
+subroutine test_tc_scf
+ implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
+! provide tc_grad_square_ao_test
+!  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
+! ---
+
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_thr_b3_size(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
+   enddo
+!   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b_test
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ integer :: ii , jj
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
+!        stop
+!       endif
+!      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ double precision :: e_ref, e_new
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
+     enddo
+    enddo
+   enddo
+
+  enddo
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo
+
+! ---
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & 
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) &
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end
+
+! ---
+
+subroutine test_tc_grad_and_lapl_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_and_lapl_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_square_ao tc_grad_square_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_square_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_two_e_tc_non_hermit_integral()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha
+  PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_alpha(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_alpha    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot a = ', diff_tot / norm
+  print *, '     norm a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_beta(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_beta    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot b = ', diff_tot / norm
+  print *, '     norm b = ', norm
+  print *, ' '
+
+  ! ---
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao_test()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  print*, ' test_tc_grad_square_ao_test '
+
+  thr_ih = 1d-7
+
+  PROVIDE tc_grad_square_ao_test tc_grad_square_ao_test_ref
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+      
+
+          diff = dabs(tc_grad_square_ao_test(l,k,j,i) - tc_grad_square_ao_test_ref(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' new         : ', tc_grad_square_ao_test    (l,k,j,i)
+            print *, ' ref         : ', tc_grad_square_ao_test_ref(l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_test_ref(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+end
+
+! ---
+
+
diff --git a/src/tc_scf/three_e_energy_bi_ortho.irp.f b/src/tc_scf/three_e_energy_bi_ortho.irp.f
new file mode 100644
index 00000000..64212da8
--- /dev/null
+++ b/src/tc_scf/three_e_energy_bi_ortho.irp.f
@@ -0,0 +1,174 @@
+
+subroutine contrib_3e_diag_sss(i,j,k,integral)
+ implicit none
+ integer, intent(in) :: i,j,k
+ BEGIN_DOC
+ ! returns the pure same spin contribution to diagonal matrix element of 3e term
+ END_DOC
+ double precision, intent(out) :: integral
+ double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+ call  give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j >
+ call  give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int)      ! < i k j | j i k >
+ call  give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i >
+ integral = direct_int + c_3_int + c_minus_3_int 
+ ! negative terms :: exchange contrib
+ call  give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 
+ call  give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23
+ call  give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12
+ integral += - exch_13_int - exch_23_int  - exch_12_int 
+ integral = -integral
+end
+
+subroutine contrib_3e_diag_soo(i,j,k,integral)
+ implicit none
+ integer, intent(in) :: i,j,k
+ BEGIN_DOC
+ ! returns the pure same spin contribution to diagonal matrix element of 3e term
+ END_DOC
+ double precision, intent(out) :: integral
+ double precision :: direct_int, exch_23_int
+ call  give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j >
+ call  give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23
+ integral = direct_int - exch_23_int 
+ integral = -integral
+end
+
+
+subroutine give_aaa_contrib_bis(integral_aaa)
+ implicit none
+ double precision, intent(out) :: integral_aaa
+ double precision :: integral
+ integer :: i,j,k
+ integral_aaa = 0.d0
+ do i = 1, elec_alpha_num
+  do j = i+1, elec_alpha_num
+   do k = j+1, elec_alpha_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_aaa += integral
+   enddo
+  enddo
+ enddo
+
+end
+
+subroutine give_aaa_contrib(integral_aaa)
+ implicit none
+ double precision, intent(out) :: integral_aaa
+ double precision :: integral
+ integer :: i,j,k
+ integral_aaa = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_alpha_num
+   do k = 1, elec_alpha_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_aaa += integral
+   enddo
+  enddo
+ enddo
+ integral_aaa *= 1.d0/6.d0 
+end
+
+
+subroutine give_aab_contrib(integral_aab)
+ implicit none
+ double precision, intent(out) :: integral_aab
+ double precision :: integral
+ integer :: i,j,k
+ integral_aab = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_alpha_num
+   do k = 1, elec_alpha_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_aab += integral
+   enddo
+  enddo
+ enddo
+ integral_aab *= 0.5d0
+end
+
+
+subroutine give_aab_contrib_bis(integral_aab)
+ implicit none
+ double precision, intent(out) :: integral_aab
+ double precision :: integral
+ integer :: i,j,k
+ integral_aab = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_alpha_num
+   do k = j+1, elec_alpha_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_aab += integral
+   enddo
+  enddo
+ enddo
+end
+
+
+subroutine give_abb_contrib(integral_abb)
+ implicit none
+ double precision, intent(out) :: integral_abb
+ double precision :: integral
+ integer :: i,j,k
+ integral_abb = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_beta_num
+   do k = 1, elec_beta_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_abb += integral
+   enddo
+  enddo
+ enddo
+ integral_abb *= 0.5d0
+end
+
+subroutine give_abb_contrib_bis(integral_abb)
+ implicit none
+ double precision, intent(out) :: integral_abb
+ double precision :: integral
+ integer :: i,j,k
+ integral_abb = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_beta_num
+   do k = j+1, elec_beta_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_abb += integral
+   enddo
+  enddo
+ enddo
+end
+
+subroutine give_bbb_contrib_bis(integral_bbb)
+ implicit none
+ double precision, intent(out) :: integral_bbb
+ double precision :: integral
+ integer :: i,j,k
+ integral_bbb = 0.d0
+ do i = 1, elec_beta_num
+  do j = i+1, elec_beta_num
+   do k = j+1, elec_beta_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_bbb += integral
+   enddo
+  enddo
+ enddo
+
+end
+
+subroutine give_bbb_contrib(integral_bbb)
+ implicit none
+ double precision, intent(out) :: integral_bbb
+ double precision :: integral
+ integer :: i,j,k
+ integral_bbb = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_beta_num
+   do k = 1, elec_beta_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_bbb += integral
+   enddo
+  enddo
+ enddo
+ integral_bbb *= 1.d0/6.d0 
+end
+
+
diff --git a/src/tools/print_sorted_wf_coef.irp.f b/src/tools/print_sorted_wf_coef.irp.f
new file mode 100644
index 00000000..fa0f1eab
--- /dev/null
+++ b/src/tools/print_sorted_wf_coef.irp.f
@@ -0,0 +1,19 @@
+program sort_wf
+ implicit none
+ read_wf =.True.
+ call routine
+
+end
+
+subroutine routine
+ implicit none
+ integer :: i
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.wf_sorted'
+ i_unit_output = getUnitAndOpen(output,'w')
+ do i= 1, N_det
+  write(i_unit_output,*)i,dabs(psi_coef_sorted(i,1))
+ enddo
+
+end
diff --git a/src/tools/save_natorb.irp.f b/src/tools/save_natorb.irp.f
index f6331d13..b4e9f5dc 100644
--- a/src/tools/save_natorb.irp.f
+++ b/src/tools/save_natorb.irp.f
@@ -12,6 +12,7 @@ program save_natorb
 ! matrices of each state with the corresponding
 ! :option:`determinants state_average_weight`
   END_DOC
+  PROVIDE nucl_coord
   read_wf = .True.
   touch read_wf
   call save_natural_mos
diff --git a/src/two_body_rdm/act_2_rdm.irp.f b/src/two_body_rdm/act_2_rdm.irp.f
index 41b28aea..c550e991 100644
--- a/src/two_body_rdm/act_2_rdm.irp.f
+++ b/src/two_body_rdm/act_2_rdm.irp.f
@@ -4,21 +4,18 @@
  BEGIN_DOC
 !                             12 12
 !                 1 2 1 2 == <ij|kl>
-! act_2_rdm_ab_mo(i,j,k,l,istate) =  STATE SPECIFIC physicist notation for 2RDM of alpha/beta electrons 
+! act_2_rdm_ab_mo(i,j,k,l,istate) =  STATE SPECIFIC physicist notation for 2RDM of alpha/beta+beta/alpha electrons 
 ! 
-! <Psi_{istate}| a^{\dagger}_{i \alpha} a^{\dagger}_{j \beta} a_{l \beta} a_{k \alpha} |Psi_{istate}>
+!   <Psi_{istate}| a^{\dagger}_{i \alpha} a^{\dagger}_{j \beta} a_{l \beta} a_{k \alpha} |Psi_{istate}>
+!
+! + <Psi_{istate}| a^{\dagger}_{i \beta} a^{\dagger}_{j \alpha} a_{l \alpha} a_{k \beta} |Psi_{istate}>
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * N_{\beta}^{act}
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * N_{\beta}^{act} * 2
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! !!!!! WARNING !!!!! For efficiency reasons, electron 1 is alpha, electron 2 is beta
-!
-!  act_2_rdm_ab_mo(i,j,k,l,istate) = i:alpha, j:beta, j:alpha, l:beta
-!                      
-!                      Therefore you don't necessary have symmetry between electron 1 and 2 
  END_DOC 
  integer :: ispin
  double precision :: wall_1, wall_2
@@ -44,6 +41,7 @@
  endif
  call wall_time(wall_2)
  print*,'Wall time to provide act_2_rdm_ab_mo',wall_2 - wall_1
+ act_2_rdm_ab_mo *= 2.d0
  END_PROVIDER 
 
 
@@ -56,7 +54,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * (N_{\alpha}^{act} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * (N_{\alpha}^{act} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC 
@@ -84,6 +82,7 @@
 
  call wall_time(wall_2)
  print*,'Wall time to provide act_2_rdm_aa_mo',wall_2 - wall_1
+ act_2_rdm_aa_mo *= 2.d0
  END_PROVIDER 
  
 
@@ -96,7 +95,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta}^{act} * (N_{\beta}^{act} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta}^{act} * (N_{\beta}^{act} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC 
@@ -124,6 +123,7 @@
 
  call wall_time(wall_2)
  print*,'Wall time to provide act_2_rdm_bb_mo',wall_2 - wall_1
+ act_2_rdm_bb_mo *= 2.d0
  END_PROVIDER 
 
  BEGIN_PROVIDER [double precision, act_2_rdm_spin_trace_mo, (n_act_orb,n_act_orb,n_act_orb,n_act_orb,N_states)]
@@ -135,7 +135,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec}^{act} * (N_{elec}^{act} - 1)/2 
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec}^{act} * (N_{elec}^{act} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC 
@@ -161,6 +161,7 @@
   call ezfio_set_two_body_rdm_io_two_body_rdm_spin_trace("Read")
  endif
 
+ act_2_rdm_spin_trace_mo *= 2.d0
  call wall_time(wall_2)
  print*,'Wall time to provide act_2_rdm_spin_trace_mo',wall_2 - wall_1
  END_PROVIDER 
diff --git a/src/two_body_rdm/example.irp.f b/src/two_body_rdm/example.irp.f
index 6cf0209e..30e2685a 100644
--- a/src/two_body_rdm/example.irp.f
+++ b/src/two_body_rdm/example.irp.f
@@ -17,6 +17,7 @@ subroutine routine_active_only
  double precision :: wee_ab_st_av, rdm_ab_st_av
  double precision :: wee_tot_st_av, rdm_tot_st_av,spin_trace
  double precision :: wee_aa_st_av_2,wee_ab_st_av_2,wee_bb_st_av_2,wee_tot_st_av_2,wee_tot_st_av_3
+ double precision :: accu_aa, accu_bb, accu_ab, accu_tot
 
  wee_ab  = 0.d0
  wee_bb  = 0.d0
@@ -64,14 +65,23 @@ subroutine routine_active_only
  do istate = 1, N_states
    !! PURE ACTIVE PART 
    !! 
+   accu_aa  = 0.d0
+   accu_bb  = 0.d0
+   accu_ab  = 0.d0
+   accu_tot = 0.d0
    do i = 1, n_act_orb
     iorb = list_act(i)
     do j = 1, n_act_orb
      jorb = list_act(j)
+     accu_bb += act_2_rdm_bb_mo(j,i,j,i,1)
+     accu_aa += act_2_rdm_aa_mo(j,i,j,i,1)
+     accu_ab += act_2_rdm_ab_mo(j,i,j,i,1)
+     accu_tot += act_2_rdm_spin_trace_mo(j,i,j,i,1)
      do k = 1, n_act_orb
       korb = list_act(k)
       do l = 1, n_act_orb
        lorb = list_act(l)
+       !                               1 2 1 2                                   2 1 2 1
        if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate)).gt.1.d-10)then
         print*,'Error in act_2_rdm_spin_trace_mo'
         print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l) - act_2_rdm_spin_trace_mo(j,i,l,k)).gt.1.d-10"
@@ -79,6 +89,7 @@ subroutine routine_active_only
         print*,act_2_rdm_spin_trace_mo(i,j,k,l,istate),act_2_rdm_spin_trace_mo(j,i,l,k,istate),dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate))
        endif
 
+      !                                1 2 1 2                                   1 2 1 2 
        if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate)).gt.1.d-10)then
         print*,'Error in act_2_rdm_spin_trace_mo'
         print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate),istate).gt.1.d-10"
@@ -106,10 +117,10 @@ subroutine routine_active_only
        endif
  
 
-       wee_ab(istate)    += vijkl * rdmab
-       wee_aa(istate)    += vijkl * rdmaa
-       wee_bb(istate)    += vijkl * rdmbb
-       wee_tot(istate)    += vijkl * rdmtot
+       wee_ab(istate)    += 0.5d0 * vijkl * rdmab
+       wee_aa(istate)    += 0.5d0 * vijkl * rdmaa
+       wee_bb(istate)    += 0.5d0 * vijkl * rdmbb
+       wee_tot(istate)   += 0.5d0 * vijkl * rdmtot
 
       enddo
      enddo
@@ -131,6 +142,15 @@ subroutine routine_active_only
    print*,'wee_tot                 = ',wee_tot(istate)
    print*,'Full energy '
    print*,'psi_energy_two_e(istate)= ',psi_energy_two_e(istate)
+   print*,'--------------------------'
+   print*,'accu_aa       = ',accu_aa
+   print*,'N_a (N_a-1)   = ', elec_alpha_num*(elec_alpha_num-1)
+   print*,'accu_bb       = ',accu_bb
+   print*,'2 N_b (N_b-1) = ', elec_beta_num*(elec_beta_num-1)*2
+   print*,'accu_ab       = ',accu_ab
+   print*,'N_a N_b       = ', elec_beta_num*elec_alpha_num
+   print*,'accu_tot      = ',accu_tot
+   print*,'Ne(Ne-1)      = ',(elec_num-1)*elec_num 
   enddo
  wee_aa_st_av  = 0.d0
  wee_bb_st_av  = 0.d0
@@ -172,10 +192,10 @@ subroutine routine_active_only
       print*,spin_trace,rdm_tot_st_av,dabs(spin_trace - rdm_tot_st_av)
      endif
 
-     wee_aa_st_av += vijkl * rdm_aa_st_av
-     wee_bb_st_av += vijkl * rdm_bb_st_av
-     wee_ab_st_av += vijkl * rdm_ab_st_av
-     wee_tot_st_av += vijkl * rdm_tot_st_av
+     wee_aa_st_av  += 0.5d0 * vijkl * rdm_aa_st_av
+     wee_bb_st_av  += 0.5d0 * vijkl * rdm_bb_st_av
+     wee_ab_st_av  += 0.5d0 * vijkl * rdm_ab_st_av
+     wee_tot_st_av += 0.5d0 * vijkl * rdm_tot_st_av
     enddo
    enddo
   enddo
@@ -259,10 +279,10 @@ subroutine routine_full_mos
        rdmbb  =  full_occ_2_rdm_bb_mo(l,k,j,i,istate)
        rdmtot =  full_occ_2_rdm_spin_trace_mo(l,k,j,i,istate)
 
-       wee_ab(istate) += vijkl * rdmab
-       wee_aa(istate) += vijkl * rdmaa
-       wee_bb(istate) += vijkl * rdmbb
-       wee_tot(istate)+= vijkl * rdmtot
+       wee_ab(istate) += 0.5d0 * vijkl * rdmab
+       wee_aa(istate) += 0.5d0 * vijkl * rdmaa
+       wee_bb(istate) += 0.5d0 * vijkl * rdmbb
+       wee_tot(istate)+= 0.5d0 * vijkl * rdmtot
       enddo
      enddo
      aa_norm(istate) += full_occ_2_rdm_aa_mo(j,i,j,i,istate)
@@ -290,18 +310,19 @@ subroutine routine_full_mos
    print*,'Normalization of two-rdms '
    print*,''
    print*,'aa_norm(istate)         = ',aa_norm(istate)
-   print*,'N_alpha(N_alpha-1)/2    = ',elec_num_tab(1) * (elec_num_tab(1) - 1)/2
+   print*,'N_alpha(N_alpha-1)      = ',elec_num_tab(1) * (elec_num_tab(1) - 1)
    print*,''
    print*,'bb_norm(istate)         = ',bb_norm(istate)
-   print*,'N_alpha(N_alpha-1)/2    = ',elec_num_tab(2) * (elec_num_tab(2) - 1)/2
+   print*,'N_alpha(N_alpha-1)      = ',elec_num_tab(2) * (elec_num_tab(2) - 1)
    print*,''
    print*,'ab_norm(istate)         = ',ab_norm(istate)
-   print*,'N_alpha * N_beta        = ',elec_num_tab(1) * elec_num_tab(2)
+   print*,'N_alpha * N_beta *2     = ',elec_num_tab(1) * elec_num_tab(2) * 2
    print*,''
    print*,'tot_norm(istate)        = ',tot_norm(istate)
-   print*,'N(N-1)/2                = ',elec_num*(elec_num - 1)/2
+   print*,'N(N-1)                  = ',elec_num*(elec_num - 1)
   enddo
 
+! return 
  wee_aa_st_av  = 0.d0
  wee_bb_st_av  = 0.d0
  wee_ab_st_av  = 0.d0
@@ -321,10 +342,10 @@ subroutine routine_full_mos
      rdm_ab_st_av  = state_av_full_occ_2_rdm_ab_mo(l,k,j,i)
      rdm_tot_st_av = state_av_full_occ_2_rdm_spin_trace_mo(l,k,j,i)
 
-     wee_aa_st_av  += vijkl * rdm_aa_st_av
-     wee_bb_st_av  += vijkl * rdm_bb_st_av
-     wee_ab_st_av  += vijkl * rdm_ab_st_av
-     wee_tot_st_av += vijkl * rdm_tot_st_av
+     wee_aa_st_av  += 0.5d0 * vijkl * rdm_aa_st_av
+     wee_bb_st_av  += 0.5d0 * vijkl * rdm_bb_st_av
+     wee_ab_st_av  += 0.5d0 * vijkl * rdm_ab_st_av
+     wee_tot_st_av += 0.5d0 * vijkl * rdm_tot_st_av
     enddo
    enddo
   enddo
@@ -334,17 +355,12 @@ subroutine routine_full_mos
  print*,''
  print*,'STATE AVERAGE ENERGY '
    print*,'wee_aa_st_av            = ',wee_aa_st_av
-   print*,'wee_aa_st_av_2          = ',wee_aa_st_av_2
    print*,'wee_bb_st_av            = ',wee_bb_st_av
-   print*,'wee_bb_st_av_2          = ',wee_bb_st_av_2
    print*,'wee_ab_st_av            = ',wee_ab_st_av
-   print*,'wee_ab_st_av_2          = ',wee_ab_st_av_2
    print*,'Sum of components       = ',wee_aa_st_av   + wee_bb_st_av   + wee_ab_st_av
-   print*,'Sum of components_2     = ',wee_aa_st_av_2 + wee_bb_st_av_2 + wee_ab_st_av_2
    print*,''
    print*,'Full energy '
    print*,'wee_tot_st_av           = ',wee_tot_st_av
-   print*,'wee_tot_st_av_2         = ',wee_tot_st_av_2
    print*,'wee_tot_st_av_3         = ',wee_tot_st_av_3
 
 end
diff --git a/src/two_body_rdm/full_orb_2_rdm.irp.f b/src/two_body_rdm/full_orb_2_rdm.irp.f
index fba88172..78a28acf 100644
--- a/src/two_body_rdm/full_orb_2_rdm.irp.f
+++ b/src/two_body_rdm/full_orb_2_rdm.irp.f
@@ -4,22 +4,18 @@
  full_occ_2_rdm_ab_mo = 0.d0
  integer :: i,j,k,l,iorb,jorb,korb,lorb,istate
  BEGIN_DOC
-! full_occ_2_rdm_ab_mo(i,j,k,l,istate) =  STATE SPECIFIC physicist notation for 2RDM of alpha/beta electrons 
+! full_occ_2_rdm_ab_mo(i,j,k,l,istate) =  STATE SPECIFIC physicist notation for 2RDM of alpha/beta + beta/alpha electrons 
 !
-! <Psi| a^{\dagger}_{i \alpha} a^{\dagger}_{j \beta} a_{l \beta} a_{k \alpha} |Psi>
+!   <Psi| a^{\dagger}_{i \alpha} a^{\dagger}_{j \beta} a_{l \beta} a_{k \alpha} |Psi>
+!
+! + <Psi| a^{\dagger}_{i \beta} a^{\dagger}_{j \alpha} a_{l \alpha} a_{k \beta} |Psi>
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta}
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! !!!!! WARNING !!!!! For efficiency reasons, electron 1 is ALPHA, electron 2 is BETA 
-!
-!  full_occ_2_rdm_ab_mo(i,j,k,l,istate) = i:alpha, j:beta, j:alpha, l:beta
-!                      
-!                      Therefore you don't necessary have symmetry between electron 1 and 2 
-!
 !  !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO ARE SET TO ZERO 
  END_DOC 
  full_occ_2_rdm_ab_mo = 0.d0
@@ -50,7 +46,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb,istate) = one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb,istate) = 2.d0 * one_e_dm_mo_beta(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -64,7 +60,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb,istate) = one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb,istate) = 2.d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -76,7 +72,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb,istate) = 1.D0
+      full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb,istate) = 2.D0
      enddo
     enddo
 
@@ -93,7 +89,7 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                     alph beta alph beta
-       full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb,istate) = one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb,istate) = 2.d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -107,7 +103,7 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                     alph beta alph beta
-       full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb,istate) = one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb,istate) = 2.d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -119,7 +115,7 @@
      do k = 1, n_core_orb
       korb = list_core(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb,istate) = 1.D0
+      full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb,istate) = 2.D0
      enddo
     enddo
    endif
@@ -139,7 +135,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
@@ -172,11 +168,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-      full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-      full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -186,8 +182,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-     full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+     full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+     full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
     enddo
    enddo
 
@@ -203,11 +199,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-       full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-       full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -217,8 +213,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-      full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+      full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+      full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
      enddo
     enddo
    endif
@@ -237,7 +233,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
@@ -270,11 +266,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-      full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-      full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -284,8 +280,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-     full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+     full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+     full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
     enddo
    enddo
 
@@ -301,11 +297,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-       full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-       full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -315,8 +311,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-      full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+      full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+      full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
      enddo
     enddo
    endif
@@ -335,14 +331,14 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)
 !
 !  !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
 !  !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero 
 ! The two-electron energy of each state can be computed as:
 !
-!   \sum_{i,j,k,l = 1, n_core_inact_act_orb} full_occ_2_rdm_spin_trace_mo(i,j,k,l,istate) * < ii jj | kk ll > 
+!   \sum_{i,j,k,l = 1, n_core_inact_act_orb} full_occ_2_rdm_spin_trace_mo(i,j,k,l,istate) * 1/2 * < ii jj | kk ll > 
 !
 !   with ii = list_core_inact_act(i), jj = list_core_inact_act(j), kk = list_core_inact_act(k), ll = list_core_inact_act(l)
  END_DOC 
@@ -377,11 +373,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -390,8 +386,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-     full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+     full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+     full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
     enddo
    enddo
    if (.not.no_core_density)then
@@ -403,11 +399,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -416,8 +412,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
      enddo
     enddo
    endif
@@ -433,11 +429,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -446,8 +442,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-     full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+     full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+     full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
     enddo
    enddo
    if (.not.no_core_density)then
@@ -459,11 +455,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) +=  1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb,istate) += -1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -472,8 +468,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  0.5d0 
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  0.5d0 
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) +=  1.0d0 
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb,istate) -=  1.0d0 
      enddo
     enddo
    endif
@@ -489,14 +485,14 @@
       korb = list_inact(k)
       ! ALPHA INACTIVE - BETA ACTIVE
       !                                     alph beta alph beta
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       !                                     beta alph beta alph
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 0.5d0 * one_e_dm_mo_beta(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 1.0d0 * one_e_dm_mo_beta(jorb,iorb,istate)
       ! BETA INACTIVE - ALPHA ACTIVE
       !                                     beta alph beta alpha 
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       !                                     alph beta alph beta 
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 0.5d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 1.0d0 * one_e_dm_mo_alpha(jorb,iorb,istate)
      enddo
     enddo
    enddo
@@ -506,8 +502,8 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) += 0.5D0
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb,istate) += 0.5D0
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) += 1.0D0
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb,istate) += 1.0D0
      enddo
     enddo
 
@@ -523,14 +519,14 @@
        korb = list_core(k)
        !! BETA ACTIVE - ALPHA CORE 
        !                                     alph beta alph beta
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 0.5D0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 1.0D0 * one_e_dm_mo_beta(jorb,iorb,istate)
        !                                     beta alph beta alph 
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 0.5D0 * one_e_dm_mo_beta(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 1.0D0 * one_e_dm_mo_beta(jorb,iorb,istate)
        !! ALPHA ACTIVE - BETA CORE 
        !                                     alph beta alph beta
-       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 0.5D0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb,istate) += 1.0D0 * one_e_dm_mo_alpha(jorb,iorb,istate)
        !                                     beta alph beta alph 
-       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 0.5D0 * one_e_dm_mo_alpha(jorb,iorb,istate)
+       full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb,istate) += 1.0D0 * one_e_dm_mo_alpha(jorb,iorb,istate)
       enddo
      enddo
     enddo
@@ -540,8 +536,8 @@
      do k = 1, n_core_orb
       korb = list_core(k)
       !                                     alph beta alph beta
-      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) += 0.5D0
-      full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb,istate) += 0.5D0
+      full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb,istate) += 1.0D0
+      full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb,istate) += 1.0D0
      enddo
     enddo
 
diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f
index db793047..cd417a9d 100644
--- a/src/two_body_rdm/state_av_act_2rdm.irp.f
+++ b/src/two_body_rdm/state_av_act_2rdm.irp.f
@@ -2,21 +2,16 @@
  implicit none
  double precision, allocatable :: state_weights(:) 
  BEGIN_DOC
-! state_av_act_2_rdm_ab_mo(i,j,k,l) = state average physicist two-body rdm restricted to the ACTIVE indices for alpha-beta electron pairs
+! state_av_act_2_rdm_ab_mo(i,j,k,l) = state average physicist two-body rdm restricted to the ACTIVE indices for alpha/beta+beta/alpha electron pairs
 !
 !                                     = \sum_{istate} w(istate) * <Psi_{istate}| a^{\dagger}_{i,alpha} a^{\dagger}_{j,beta} a_{l,beta} a_{k,alpha} |Psi_{istate}>
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * N_{\beta}^{act}
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * N_{\beta}^{act} * 2
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! !!!!! WARNING !!!!! For efficiency reasons, electron 1 is alpha, electron 2 is beta
-!
-!  state_av_act_2_rdm_ab_mo(i,j,k,l) = i:alpha, j:beta, j:alpha, l:beta
-!                      
-!                      Therefore you don't necessary have symmetry between electron 1 and 2 
  END_DOC 
  allocate(state_weights(N_states))
  state_weights = state_average_weight
@@ -34,6 +29,8 @@
  call orb_range_2_rdm_state_av_openmp(state_av_act_2_rdm_ab_mo,n_act_orb,n_act_orb,list_act,state_weights,ispin,psi_coef,size(psi_coef,2),size(psi_coef,1))
  call wall_time(wall_2)
  print*,'Wall time to provide state_av_act_2_rdm_ab_mo',wall_2 - wall_1
+ ! factor 2 to have the correct normalization factor 
+ state_av_act_2_rdm_ab_mo *= 2.d0
 
  END_PROVIDER 
 
@@ -48,7 +45,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * (N_{\alpha}^{act} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha}^{act} * (N_{\alpha}^{act} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC 
@@ -63,6 +60,8 @@
  call orb_range_2_rdm_state_av_openmp(state_av_act_2_rdm_aa_mo,n_act_orb,n_act_orb,list_act,state_weights,ispin,psi_coef,size(psi_coef,2),size(psi_coef,1))
  call wall_time(wall_2)
  print*,'Wall time to provide state_av_act_2_rdm_aa_mo',wall_2 - wall_1
+ ! factor 2 to have the correct normalization factor 
+ state_av_act_2_rdm_aa_mo *= 2.d0
 
  END_PROVIDER 
 
@@ -76,7 +75,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta}^{act} * (N_{\beta}^{act} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta}^{act} * (N_{\beta}^{act} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC 
@@ -91,6 +90,8 @@
  call orb_range_2_rdm_state_av_openmp(state_av_act_2_rdm_bb_mo,n_act_orb,n_act_orb,list_act,state_weights,ispin,psi_coef,size(psi_coef,2),size(psi_coef,1))
  call wall_time(wall_2)
  print*,'Wall time to provide state_av_act_2_rdm_bb_mo',wall_2 - wall_1
+ ! factor 2 to have the correct normalization factor 
+ state_av_act_2_rdm_bb_mo *= 2.d0
 
  END_PROVIDER 
 
@@ -104,7 +105,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
  END_DOC
diff --git a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f
index b3a5fe65..2e44665d 100644
--- a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f
+++ b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f
@@ -4,22 +4,16 @@
  state_av_full_occ_2_rdm_ab_mo = 0.d0
  integer :: i,j,k,l,iorb,jorb,korb,lorb
  BEGIN_DOC
-! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) =  STATE AVERAGE physicist notation for 2RDM of alpha/beta electrons 
+! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) =  STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons 
 !
 !                                     = \sum_{istate} w(istate) * <Psi_{istate}| a^{\dagger}_{i,alpha} a^{\dagger}_{j,beta} a_{l,beta} a_{k,alpha} |Psi_{istate}>
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta}
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2
 !
 !  !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 ! 
-!  !!!!! WARNING !!!!! For efficiency reasons, electron 1 is ALPHA, electron 2 is BETA 
-!
-!  state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = i:alpha, j:beta, j:alpha, l:beta
-!                      
-!                      Therefore you don't necessary have symmetry between electron 1 and 2 
-!
 !  !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero 
  END_DOC 
  state_av_full_occ_2_rdm_ab_mo = 0.d0
@@ -47,7 +41,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb) = one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb) = 2.d0 * one_e_dm_mo_beta_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -61,7 +55,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb) = one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb) = 2.d0 * one_e_dm_mo_alpha_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -73,7 +67,7 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 1.D0
+      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0
      enddo
     enddo
 
@@ -90,7 +84,7 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                     alph beta alph beta
-       state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb) = one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,iorb) = 2.d0 * one_e_dm_mo_beta_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -104,7 +98,7 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                     alph beta alph beta
-       state_av_full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb) = one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_ab_mo(jorb,korb,iorb,korb) = 2.d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -116,7 +110,7 @@
      do k = 1, n_core_orb
       korb = list_core(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 1.D0
+      state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0
      enddo
     enddo
    endif
@@ -135,7 +129,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1)
 !
 !  !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
@@ -167,11 +161,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-      state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-      state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -181,8 +175,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) +=  0.5d0 
-     state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -=  0.5d0 
+     state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) +=  1.0d0 
+     state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -=  1.0d0 
     enddo
    enddo
 
@@ -198,11 +192,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-       state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-       state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -212,8 +206,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) +=  0.5d0 
-      state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -=  0.5d0 
+      state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) +=  1.0d0 
+      state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -=  1.0d0 
      enddo
     enddo
    endif
@@ -231,7 +225,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1)
 !
 ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
@@ -263,11 +257,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-      state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-      state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -277,8 +271,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) +=  0.5d0 
-     state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -=  0.5d0 
+     state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) +=  1.0d0 
+     state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -=  1.0d0 
     enddo
    enddo
 
@@ -294,11 +288,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-       state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-       state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -308,8 +302,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) +=  0.5d0 
-      state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -=  0.5d0 
+      state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) +=  1.0d0 
+      state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -=  1.0d0 
      enddo
     enddo
    endif
@@ -328,7 +322,7 @@
 !
 ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
 !
-! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2
+! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)
 !
 !  !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" 
 !
@@ -364,11 +358,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -377,8 +371,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  0.5d0 
-     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  0.5d0 
+     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  1.0d0 
+     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  1.0d0 
     enddo
    enddo
    if (.not.no_core_density)then
@@ -390,11 +384,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -403,8 +397,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  0.5d0 
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  0.5d0 
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  1.0d0 
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  1.0d0 
      enddo
     enddo
    endif
@@ -420,11 +414,11 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                       1     2   1    2    : DIRECT TERM 
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       !                                       1     2   1    2    : EXCHANGE TERM 
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -433,8 +427,8 @@
     jorb = list_inact(j)
     do k = 1, n_inact_orb
      korb = list_inact(k)
-     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  0.5d0 
-     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  0.5d0 
+     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  1.0d0 
+     state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  1.0d0 
     enddo
    enddo
    if (.not.no_core_density)then
@@ -446,11 +440,11 @@
       do k = 1, n_core_orb
        korb = list_core(k)
        !                                       1     2   1    2    : DIRECT TERM 
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) +=  1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
        !                                       1     2   1    2    : EXCHANGE TERM 
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -459,8 +453,8 @@
      jorb = list_core(j)
      do k = 1, n_core_orb
       korb = list_core(k)
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  0.5d0 
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  0.5d0 
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) +=  1.0d0 
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -=  1.0d0 
      enddo
     enddo
    endif
@@ -476,14 +470,14 @@
       korb = list_inact(k)
       ! ALPHA INACTIVE - BETA ACTIVE
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       !                                     beta alph beta alph
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 0.5d0 * one_e_dm_mo_beta_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb)
       ! BETA INACTIVE - ALPHA ACTIVE
       !                                     beta alph beta alpha 
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
       !                                     alph beta alph beta 
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 0.5d0 * one_e_dm_mo_alpha_average(jorb,iorb)
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb)
      enddo
     enddo
    enddo
@@ -493,8 +487,8 @@
      do k = 1, n_inact_orb
       korb = list_inact(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 0.5D0
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 0.5D0
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0d0
      enddo
     enddo
 
@@ -510,14 +504,14 @@
        korb = list_core(k)
        !! BETA ACTIVE - ALPHA CORE 
        !                                     alph beta alph beta
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 0.5D0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb)
        !                                     beta alph beta alph 
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 0.5D0 * one_e_dm_mo_beta_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb)
        !! ALPHA ACTIVE - BETA CORE 
        !                                     alph beta alph beta
-       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 0.5D0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb)
        !                                     beta alph beta alph 
-       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 0.5D0 * one_e_dm_mo_alpha_average(jorb,iorb)
+       state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb)
       enddo
      enddo
     enddo
@@ -527,8 +521,8 @@
      do k = 1, n_core_orb
       korb = list_core(k)
       !                                     alph beta alph beta
-      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 0.5D0
-      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 0.5D0
+      state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0D0
+      state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0D0
      enddo
     enddo
 
diff --git a/src/two_body_rdm/test_2_rdm.irp.f b/src/two_body_rdm/test_2_rdm.irp.f
index 123261d8..4eb8f9f0 100644
--- a/src/two_body_rdm/test_2_rdm.irp.f
+++ b/src/two_body_rdm/test_2_rdm.irp.f
@@ -2,7 +2,7 @@ program test_2_rdm
  implicit none
  read_wf = .True.
  touch read_wf 
- call routine_active_only
+! call routine_active_only
  call routine_full_mos
 end
 
diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f
index a4dea15f..7e35fc7b 100644
--- a/src/two_body_rdm/two_e_dm_mo.irp.f
+++ b/src/two_body_rdm/two_e_dm_mo.irp.f
@@ -29,7 +29,8 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)]
      enddo
     enddo
    enddo
-   two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) * 2.d0
+   two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) 
+!   * 2.d0
 
  END_PROVIDER
 
diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f
index 2e5aa4d1..ad7a3b21 100644
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@@ -4,8 +4,8 @@ subroutine orb_range_2_rdm_openmp(big_array,dim1,norb,list_orb,ispin,u_0,N_st,sz
    BEGIN_DOC
    ! if ispin == 1 :: alpha/alpha 2rdm 
    !          == 2 :: beta /beta  2rdm 
-   !          == 3 :: alpha/beta  2rdm 
-   !          == 4 :: spin traced 2rdm :: aa + bb + 0.5 (ab + ba))
+   !          == 3 :: alpha/beta + beta/alpha  2rdm 
+   !          == 4 :: spin traced 2rdm :: aa + bb + ab + ba
    !
    ! Assumes that the determinants are in psi_det
    !
diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f
new file mode 100644
index 00000000..188bfa58
--- /dev/null
+++ b/src/utils/block_diag_degen.irp.f
@@ -0,0 +1,218 @@
+
+subroutine diag_mat_per_fock_degen(fock_diag, mat_ref, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval)
+
+
+  BEGIN_DOC
+  !
+  ! subroutine that diagonalizes a matrix mat_ref BY BLOCK
+  !
+  ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag"
+  !
+  ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together
+  !
+  !          : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  !          : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg
+  double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n)
+
+  integer                       :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen
+  integer                       :: ii, jj, i_good, j_good, n_real
+  integer                       :: icount_eigval
+  logical,          allocatable :: is_ok(:)
+  integer,          allocatable :: list_degen(:,:), list_same_degen(:)
+  integer,          allocatable :: iorder(:), list_degen_sorted(:)
+  double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:)
+  double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:)
+
+  allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n))
+  leigvec_unsrtd = 0.d0
+  reigvec_unsrtd = 0.d0
+  eigval_unsrtd  = 0.d0
+
+  ! obtain degeneracies 
+  allocate(list_degen(n,0:n))
+  call give_degen_full_list(fock_diag, n, thr_deg, list_degen, n_degen_list)
+
+  allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list))
+  do i = 1, n_degen_list
+    n_degen = list_degen(i,0) 
+    list_degen_sorted(i) = n_degen
+    iorder(i) = i
+  enddo
+
+  ! sort by number of degeneracies 
+  call isort(list_degen_sorted, iorder, n_degen_list)
+
+  allocate(is_ok(n_degen_list))
+  is_ok = .True.
+  icount_eigval = 0
+
+  ! loop over degeneracies 
+  do i = 1, n_degen_list
+    if(.not.is_ok(i)) cycle
+
+    is_ok(i) = .False.
+    n_degen  = list_degen_sorted(i)
+
+    print *, ' diagonalizing for n_degen = ', n_degen
+
+    k = 1
+
+   ! group all the entries having the same degeneracies 
+!!  do while (list_degen_sorted(i+k)==n_degen)
+    do m = i+1, n_degen_list
+      if(list_degen_sorted(m)==n_degen) then
+        is_ok(i+k) = .False.
+        k += 1
+      endif
+    enddo
+
+    print *, ' number of identical degeneracies = ', k
+    size_mat = k*n_degen  
+    print *, ' size_mat = ', size_mat
+    allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat))
+    allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat))
+    ! group all the elements sharing the same degeneracy
+    icount = 0
+    do j = 1, k ! jth set of degeneracy
+      index_degen = iorder(i+j-1)
+      do m = 1, n_degen
+        icount += 1
+        list_same_degen(icount) = list_degen(index_degen,m)
+      enddo
+    enddo
+
+    print *, ' list of elements '
+    do icount = 1, size_mat
+      print *, icount, list_same_degen(icount)
+    enddo
+
+    ! you copy subset of matrix elements having all the same degeneracy in mat_tmp
+    do ii = 1, size_mat
+      i_good = list_same_degen(ii)
+      do jj = 1, size_mat
+        j_good = list_same_degen(jj)
+        mat_tmp(jj,ii) = mat_ref(j_good,i_good)
+      enddo
+    enddo
+
+    call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd &
+                       , leigvec_tmp, reigvec_tmp         & 
+                       , n_real, eigval_tmp )
+
+    do ii = 1, size_mat
+      icount_eigval += 1
+      eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues 
+      do jj = 1, size_mat ! copy the eigenvectors 
+        j_good = list_same_degen(jj)
+        leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii)
+        reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii)
+      enddo
+    enddo
+
+    deallocate(mat_tmp, list_same_degen)
+    deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp)
+  enddo
+
+  if(icount_eigval .ne. n) then
+    print *, ' pb !! (icount_eigval.ne.n)'
+    print *, ' icount_eigval,n', icount_eigval, n
+    stop
+  endif
+ 
+  deallocate(iorder)
+  allocate(iorder(n))
+  do i = 1, n
+    iorder(i) = i
+  enddo
+  call dsort(eigval_unsrtd, iorder, n)
+
+  do i = 1, n
+    print*,'sorted eigenvalues '
+    i_good = iorder(i)
+    eigval(i) = eigval_unsrtd(i)
+    print*,'i,eigval(i) = ',i,eigval(i)
+    do j = 1, n
+      leigvec(j,i) = leigvec_unsrtd(j,i_good)
+      reigvec(j,i) = reigvec_unsrtd(j,i_good)
+    enddo
+  enddo
+
+  deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd)
+  deallocate(list_degen)
+  deallocate(iorder, list_degen_sorted)
+  deallocate(is_ok)
+
+end
+
+! ---
+
+subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list)
+
+  BEGIN_DOC
+  ! you enter with an array A(n) and spits out all the elements degenerated up to thr
+  !
+  ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL 
+  !
+  ! list_degen(i,0) = number of degenerate entries 
+  !
+  ! list_degen(i,1) = index of the first degenerate entry
+  !
+  ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries 
+  !
+  ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element
+  END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: A(n)
+  double precision, intent(in)  :: thr
+  integer,          intent(in)  :: n
+  integer,          intent(out) :: list_degen(n,0:n), n_degen_list
+  integer                       :: i, j, icount, icheck
+  logical, allocatable          :: is_ok(:)
+
+
+  allocate(is_ok(n))
+  n_degen_list = 0
+  is_ok = .True.
+  do i = 1, n
+    if(.not.is_ok(i)) cycle
+    n_degen_list +=1
+    is_ok(i) = .False.
+    list_degen(n_degen_list,1) = i
+    icount = 1
+    do j = i+1, n
+      if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then
+        is_ok(j) = .False.
+        icount += 1
+        list_degen(n_degen_list,icount) = j
+      endif
+    enddo
+
+    list_degen(n_degen_list,0) = icount
+  enddo
+
+  icheck = 0
+  do i = 1, n_degen_list
+    icheck += list_degen(i,0)
+  enddo
+
+  if(icheck.ne.n)then
+    print *, ' pb ! :: icheck.ne.n'
+    print *, icheck, n
+    stop
+  endif
+
+end
+
+! ---
+
diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F
index 297a839e..d1727701 100644
--- a/src/utils/constants.include.F
+++ b/src/utils/constants.include.F
@@ -14,6 +14,7 @@ double precision, parameter :: thresh = 1.d-15
 double precision, parameter :: cx_lda = -0.73855876638202234d0
 double precision, parameter :: c_2_4_3 = 2.5198420997897464d0
 double precision, parameter :: cst_lda = -0.93052573634909996d0
-double precision, parameter :: c_4_3 = 1.3333333333333333d0
-double precision, parameter :: c_1_3 = 0.3333333333333333d0
-
+double precision, parameter :: c_4_3 = 4.d0/3.d0
+double precision, parameter :: c_1_3 = 1.d0/3.d0
+double precision, parameter :: sq_op5 = dsqrt(0.5d0)
+double precision, parameter :: dlog_2pi =  dlog(2.d0*dacos(-1.d0))
diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f
new file mode 100644
index 00000000..1378d367
--- /dev/null
+++ b/src/utils/format_w_error.irp.f
@@ -0,0 +1,71 @@
+subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_error)
+
+  implicit none
+ 
+  BEGIN_DOC
+  ! Format for double precision, value(error)
+  END_DOC
+
+  ! in
+  ! | value         | double precision | value...      |
+  ! | error         | double precision | error...      |
+  ! | size_nb       | integer          | X in FX.Y     |
+  ! | max_nb_digits | integer          | Max Y in FX.Y |
+
+  ! out
+  ! | format_value  | character | string FX.Y for the format |
+  ! | str_error     | character | string of the error |
+
+  ! internal
+  ! | str_size      | character | size in string format                             |
+  ! | nb_digits     | integer   | number of digits Y in FX.Y depending of the error |
+  ! | str_nb_digits | character | nb_digits in string format                        |
+  ! | str_exp       | character | string of the value in exponential format         |
+
+  ! in
+  double precision, intent(in)   :: error, value
+  integer, intent(in)            :: size_nb, max_nb_digits
+
+  ! out
+  character(len=20), intent(out) :: str_error, format_value
+
+  ! internal
+  character(len=20)              :: str_size, str_nb_digits, str_exp
+  integer                        :: nb_digits
+
+  ! max_nb_digit: Y max
+  ! size_nb = Size of the double: X (FX.Y)
+  write(str_size,'(I3)') size_nb
+
+  ! Error
+  write(str_exp,'(1pE20.0)') error
+  str_error = trim(adjustl(str_exp))
+  
+  ! Number of digit: Y (FX.Y) from the exponent
+  str_nb_digits = str_exp(19:20)
+  read(str_nb_digits,*) nb_digits
+ 
+  ! If the error is 0d0
+  if (error <= 1d-16) then 
+    write(str_nb_digits,*) max_nb_digits
+  endif
+
+  ! If the error is too small 
+  if (nb_digits > max_nb_digits) then
+      write(str_nb_digits,*) max_nb_digits
+      str_error(1:1) = '0'
+  endif
+
+  ! If the error is too big (>= 0.5)
+  if (error >= 0.5d0) then
+    str_nb_digits = '1'
+    str_error(1:1) = '*'
+  endif
+
+  ! FX.Y,A1,A1,A1 for value(str_error)
+  !string = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))//',A1,A1,A1'
+
+  ! FX.Y just for the value 
+  format_value = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))
+
+end
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index 38e198dc..15d79622 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -129,6 +129,106 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
 
 end
 
+subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, LD_A, B_center, n_points)
+
+  BEGIN_DOC
+  ! Transforms the product of
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+  !
+  ! WARNING                      :: : IF fact_k is too smal then:
+  ! returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer,          intent(in)  :: n_points, ldp, LD_A
+  integer,          intent(in)  :: a(3), b(3)              ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)  :: alpha, beta             ! exponents
+  double precision, intent(in)  :: A_center(LD_A,3)        ! A center
+  double precision, intent(in)  :: B_center(3)             ! B center
+  integer,          intent(out) :: iorder(3)               ! i_order(i) = order of the polynomials
+  double precision, intent(out) :: P_center(n_points,3)    ! new center
+  double precision, intent(out) :: p                       ! new exponent
+  double precision, intent(out) :: fact_k(n_points)        ! constant factor
+  double precision, intent(out) :: P_new(n_points,0:ldp,3) ! polynomial
+
+  integer                       :: n_new, i, j, ipoint, lda, ldb, xyz
+  double precision, allocatable :: P_a(:,:,:), P_b(:,:,:)
+
+
+  call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points)
+
+  if(ior(ior(b(1), b(2)), b(3)) == 0) then  ! b == (0,0,0)
+
+    iorder(1:3) = a(1:3)
+
+    lda = maxval(a)
+    allocate(P_a(n_points,0:lda,3))
+    !ldb = 0
+    !allocate(P_b(n_points,0:0,3))
+
+    !call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points)
+    call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, n_points)
+
+    do ipoint = 1, n_points
+      do xyz = 1, 3
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
+        do i = 1, a(xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    enddo
+
+    deallocate(P_a)
+    !deallocate(P_b)
+
+    return
+  endif
+
+  lda = maxval(a)
+  ldb = maxval(b)
+  allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3))
+
+  call recentered_poly2_v(P_a, lda, A_center, LD_A, P_center, a, P_b, ldb, B_center, P_center, b, n_points)
+
+  iorder(1:3) = a(1:3) + b(1:3)
+
+  do xyz = 1, 3
+    if(b(xyz) == 0) then
+
+      do ipoint = 1, n_points
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
+        do i = 1, a(xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
+        enddo
+      enddo
+
+    else
+
+      do i = 0, iorder(xyz)
+        do ipoint = 1, n_points
+          P_new(ipoint,i,xyz) = 0.d0
+        enddo
+      enddo
+
+      call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points)
+
+    endif
+  enddo
+
+end subroutine give_explicit_poly_and_gaussian_v
+
+! ---
 
 subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
   BEGIN_DOC
@@ -232,6 +332,64 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
 end subroutine
 
 
+subroutine gaussian_product_v(a, xa, LD_xa, b, xb, k, p, xp, n_points)
+
+  BEGIN_DOC
+  !
+  ! Gaussian product in 1D.
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  !
+  ! Using multiple A centers
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: LD_xa, n_points
+  double precision, intent(in)  :: a, b                  ! Exponents
+  double precision, intent(in)  :: xa(LD_xa,3), xb(3)    ! Centers
+  double precision, intent(out) :: p                     ! New exponent
+  double precision, intent(out) :: xp(n_points,3)        ! New center
+  double precision, intent(out) :: k(n_points)           ! Constant
+
+  integer                       :: ipoint
+  double precision              :: p_inv
+  double precision              :: xab(3), ab, ap, bp, bpxb(3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  ap = a*p_inv
+  bp = b*p_inv
+  bpxb(1) = bp*xb(1)
+  bpxb(2) = bp*xb(2)
+  bpxb(3) = bp*xb(3)
+
+  do ipoint = 1, n_points
+    xab(1) = xa(ipoint,1)-xb(1)
+    xab(2) = xa(ipoint,2)-xb(2)
+    xab(3) = xa(ipoint,3)-xb(3)
+    k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3))
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      xp(ipoint,1) = 0.d0
+      xp(ipoint,2) = 0.d0
+      xp(ipoint,3) = 0.d0
+    else
+      k(ipoint) = dexp(-k(ipoint))
+      xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1)
+      xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2)
+      xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3)
+    endif
+  enddo
+
+end subroutine gaussian_product_v
+
+! ---
 
 
 subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
@@ -313,6 +471,43 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
 
 end
 
+subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Multiply pairs of polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb, nc, n_points
+  integer, intent(in)            :: nd
+  double precision, intent(in)   :: b(n_points,0:nb), c(n_points,0:nc)
+  double precision, intent(inout) :: d(n_points,0:nd)
+
+  integer                        :: ib, ic, id, k, ipoint
+  if (nd < nb+nc) then
+     print *, nd,  nb, nc
+     print *, irp_here, ': nd < nb+nc'
+     stop 1
+  endif
+
+  do ic = 0,nc
+    do ipoint=1, n_points
+      d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0)
+    enddo
+  enddo
+
+  do ib=1,nb
+    do ipoint=1, n_points
+      d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib)
+    enddo
+    do ic = 1,nc
+      do ipoint=1, n_points
+        d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib)
+      enddo
+    enddo
+  enddo
+end
+
 subroutine add_poly(b,nb,c,nc,d,nd)
   implicit none
   BEGIN_DOC
@@ -369,6 +564,152 @@ subroutine add_poly_multiply(b,nb,cst,d,nd)
 end
 
 
+subroutine recentered_poly2_v(P_new, lda, x_A, LD_xA, x_P, a, P_new2, ldb, x_B, x_Q, b, n_points)
+
+  BEGIN_DOC
+  ! Recenter two polynomials
+  END_DOC
+
+  implicit none
+  integer, intent(in)            :: a(3), b(3), n_points, lda, ldb, LD_xA
+  double precision, intent(in)   :: x_A(LD_xA,3), x_P(n_points,3), x_B(3), x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, minab(3), maxab(3),ipoint, xyz
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision :: fa, fb
+
+  maxab(1:3) = max(a(1:3),b(1:3))
+  minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+  do xyz=1,3
+    if ((a(xyz)<0).or.(b(xyz)<0) ) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i =  2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,0,xyz) =  pows_b(ipoint,b(xyz))
+    enddo
+    do i =  1,min(minab(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(a(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(b(xyz),20)
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  21,a(xyz)
+      fa =  binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  21,b(xyz)
+      fb = binom_func(b(xyz),b(xyz)-i)
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+  enddo
+
+end subroutine recentered_poly2_v
+
+! ---
+
+subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points)
+
+  BEGIN_DOC
+  ! 
+  ! Recenter two polynomials. Special case for b=(0,0,0)
+  ! 
+  ! (x - A)^a (x - B)^0 = (x - P + P - A)^a  (x - Q + Q - B)^0
+  !                     = (x - P + P - A)^a 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a(3), n_points, lda, LD_xA
+  double precision, intent(in)  :: x_A(LD_xA,3), x_P(n_points,3)
+  !double precision, intent(in)  :: x_B(3), x_Q(n_points,3)
+  double precision, intent(out) :: P_new(n_points,0:lda,3)
+  !double precision, intent(out) :: P_new2(n_points,3)
+
+  integer                       :: i, j, k, l, xyz, ipoint, maxab(3)
+  double precision              :: fa
+  double precision, allocatable :: pows_a(:,:)
+  !double precision, allocatable :: pows_b(:,:)
+
+  double precision              :: binom_func
+
+  maxab(1:3) = max(a(1:3), (/0,0,0/))
+
+  allocate(pows_a(n_points,-2:maxval(maxab)+4))
+  !allocate(pows_b(n_points,-2:maxval(maxab)+4))
+
+  do xyz = 1, 3
+    if(a(xyz) < 0) cycle
+
+    do ipoint = 1, n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      !pows_b(ipoint,0) = 1.d0
+      !pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+
+    do i = 2, maxab(xyz)
+      do ipoint = 1, n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1) * pows_a(ipoint,1)
+        !pows_b(ipoint,i) = pows_b(ipoint,i-1) * pows_b(ipoint,1)
+      enddo
+    enddo
+
+    do ipoint = 1, n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      !P_new2(ipoint,xyz)   =  pows_b(ipoint,0)
+    enddo
+    do i = 1, min(a(xyz), 20)
+      fa = binom_transp(a(xyz)-i, a(xyz))
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i = 21, a(xyz)
+      fa = binom_func(a(xyz), a(xyz)-i)
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+
+  enddo !xyz
+
+  deallocate(pows_a)
+  !deallocate(pows_b)
+
+end subroutine recentered_poly2_v0
 
 subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   implicit none
@@ -412,6 +753,79 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   enddo
 end
 
+subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
+
+  BEGIN_DOC
+  !
+  ! Transform the pol centerd on A:
+  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ]
+  ! to a pol centered on B
+  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ]
+  !
+  END_DOC
+
+  ! useful for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: iorder(3)
+  double precision, intent(in)  :: A_center(3), B_center(3)
+  double precision, intent(in)  :: A_pol(0:max_dim, 3)
+  double precision, intent(out) :: B_pol(0:max_dim, 3)
+
+  integer                       :: i, Lmax
+
+  do i = 1, 3
+    Lmax = iorder(i)
+    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) )
+  enddo
+
+  return
+end subroutine pol_modif_center
+
+
+
+subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol)
+
+  BEGIN_DOC
+  !
+  ! Transform the pol centerd on A:
+  !       [ \sum_i ax_i (x-x_A)^i ]
+  ! to a pol centered on B
+  !       [ \sum_i bx_i (x-x_B)^i ]
+  !
+  ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ]
+  !      = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: iorder
+  double precision, intent(in)  :: A_center, B_center
+  double precision, intent(in)  :: A_pol(0:iorder)
+  double precision, intent(out) :: B_pol(0:iorder)
+
+  integer                       :: i, j
+  double precision              :: fact_tmp, dx
+
+  double precision              :: binom_func
+
+  dx = B_center - A_center
+
+  do i = 0, iorder
+    fact_tmp = 0.d0
+    do j = i, iorder
+      fact_tmp += A_pol(j) * binom_func(j, i) * dx**dble(j-i)
+    enddo
+    B_pol(i) = fact_tmp
+  enddo
+
+  return
+end subroutine pol_modif_center_x
+
+
 
 
 
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 405d2d20..c02560e3 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1574,78 +1574,291 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh)
 end
 
 subroutine restore_symmetry(m,n,A,LDA,thresh)
+
   implicit none
+
   BEGIN_DOC
-! Tries to find the matrix elements that are the same, and sets them
-! to the average value.
-! If restore_symm is False, only nullify small elements
+  ! Tries to find the matrix elements that are the same, and sets them
+  ! to the average value.
+  ! If restore_symm is False, only nullify small elements
   END_DOC
+
   integer, intent(in) :: m,n,LDA
   double precision, intent(inout) :: A(LDA,n)
   double precision, intent(in) :: thresh
-  integer :: i,j,k,l
-  logical, allocatable :: done(:,:)
-  double precision :: f, g, count, thresh2
+
+  double precision, allocatable :: copy(:), copy_sign(:)
+  integer, allocatable :: key(:), ii(:), jj(:)
+  integer :: sze, pi, pf, idx, i,j,k
+  double precision :: average, val, thresh2
+
   thresh2 = dsqrt(thresh)
-  call nullify_small_elements(m,n,A,LDA,thresh)
 
-!  if (.not.restore_symm) then
-!    return
-!  endif
+  sze = m * n
 
-  ! TODO:  Costs O(n^4), but can be improved to (2 n^2 * log(n)):
-  ! - copy all values in a 1D array
-  ! - sort 1D array
-  ! - average nearby elements 
-  ! - for all elements, find matching value in the sorted 1D array
+  allocate(copy(sze),copy_sign(sze),key(sze),ii(sze),jj(sze))
 
-  allocate(done(m,n))
-
-  do j=1,n
-    do i=1,m
-      done(i,j) = A(i,j) == 0.d0
+  ! Copy to 1D
+  !$OMP PARALLEL if (m>100) &
+  !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) &
+  !$OMP PRIVATE(i,j,k) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do j = 1, n
+    do i = 1, m
+      k = i+(j-1)*m
+      copy(k) = A(i,j)
+      copy_sign(k) = sign(1.d0,copy(k))
+      copy(k) = -dabs(copy(k))
+      key(k) = k
+      ii(k) = i
+      jj(k) = j
     enddo
   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
 
-  do j=1,n
-    do i=1,m
-      if ( done(i,j) ) cycle
-      done(i,j) = .True.
-      count = 1.d0
-      f = 1.d0/A(i,j)
-      do l=1,n
-        do k=1,m
-          if ( done(k,l) ) cycle
-          g = f * A(k,l)
-          if ( dabs(dabs(g) - 1.d0) < thresh2 ) then
-            count = count + 1.d0
-            if (g>0.d0) then
-              A(i,j) = A(i,j) + A(k,l)
-            else
-              A(i,j) = A(i,j) - A(k,l)
-            end if
-          endif
-        enddo
-      enddo
-      if (count > 1.d0) then
-        A(i,j) = A(i,j) / count
-        do l=1,n
-          do k=1,m
-            if ( done(k,l) ) cycle
-            g = f * A(k,l)
-            if ( dabs(dabs(g) - 1.d0) < thresh2 ) then
-              done(k,l) = .True.
-              if (g>0.d0) then
-                A(k,l) = A(i,j)
-              else
-                A(k,l) = -A(i,j)
-              end if
-            endif
-          enddo
-        enddo
+  ! Sort
+  call dsort(copy,key,sze)
+  call iset_order(ii,key,sze)
+  call iset_order(jj,key,sze)
+  call dset_order(copy_sign,key,sze)
+
+  !TODO
+  ! Parallelization with OMP
+
+  ! Symmetrize
+  i = 1
+  do while (i < sze)
+    pi = i
+    pf = i
+
+    ! Exit if the remaining elements are below thresh
+    if (-copy(i) <= thresh) exit
+
+    val = 1d0/copy(i)
+    do while (dabs(val * copy(pf+1) - 1d0) < thresh2)
+      pf = pf + 1
+      ! if pf == sze, copy(pf+1) will not be valid
+      if (pf == sze) then
+        exit
       endif
-
     enddo
+    ! if pi and pf are different do the average from pi to pf
+    if (pf - pi > 0) then
+      average = 0d0
+      do j = pi, pf
+        average = average + copy(j)
+      enddo
+      average = average / (pf-pi+1.d0)
+      do j = pi, pf
+        copy(j) = average
+      enddo
+      ! Update i
+      i = pf
+    endif
+
+    ! Update i
+    i = i + 1
   enddo
+  copy(i:) = 0.d0
+
+  !$OMP PARALLEL if (sze>10000) &
+  !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) &
+  !$OMP PRIVATE(i,j,k,idx) &
+  !$OMP DEFAULT(NONE)
+  ! copy -> A
+  !$OMP DO
+  do k = 1, sze
+      i = ii(k)
+      j = jj(k)
+      A(i,j) = sign(copy(k),copy_sign(k))
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(copy,copy_sign,key,ii,jj)
 
 end
+
+subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim)
+
+  implicit none
+
+  integer,          intent(in)  :: n, A_ldim, V_ldim, E_ldim
+  double precision, intent(in)  :: A(A_ldim,n)
+  double precision, intent(out) :: energy(E_ldim), V(V_ldim,n)
+
+  character*1                   :: JOBVL, JOBVR, BALANC, SENSE
+  integer                       :: i, j
+  integer                       :: ILO, IHI, lda, ldvl, ldvr, LWORK, INFO
+  double precision              :: ABNRM
+  integer,          allocatable :: iorder(:), IWORK(:)
+  double precision, allocatable :: WORK(:), SCALE_array(:), RCONDE(:), RCONDV(:)
+  double precision, allocatable :: Atmp(:,:), WR(:), WI(:), VL(:,:), VR(:,:), Vtmp(:)
+  double precision, allocatable :: energy_loc(:), V_loc(:,:)
+
+  allocate( Atmp(n,n), WR(n), WI(n), VL(1,1), VR(n,n) )
+  do i = 1, n
+    do j = 1, n
+      Atmp(j,i) = A(j,i)
+    enddo
+  enddo
+
+  JOBVL  = "N" ! computes the left  eigenvectors
+  JOBVR  = "V" ! computes the right eigenvectors
+  BALANC = "B" ! Diagonal scaling and Permutation for optimization
+  SENSE  = "V" ! Determines which reciprocal condition numbers are computed
+  lda  = n
+  ldvr = n
+  ldvl = 1
+
+  allocate( WORK(1), SCALE_array(n), RCONDE(n), RCONDV(n), IWORK(2*n-2) )
+
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeevx( BALANC, JOBVL, JOBVR, SENSE                  & ! CHARACTERS
+             , n, Atmp, lda                                 & ! MATRIX TO DIAGONALIZE
+             , WR, WI                                       & ! REAL AND IMAGINARY PART OF EIGENVALUES
+             , VL, ldvl, VR, ldvr                           & ! LEFT AND RIGHT EIGENVECTORS
+             , ILO, IHI, SCALE_array, ABNRM, RCONDE, RCONDV & ! OUTPUTS OF OPTIMIZATION
+             , WORK, LWORK, IWORK, INFO )
+
+  if(INFO .ne. 0) then
+    print*, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  call dgeevx( BALANC, JOBVL, JOBVR, SENSE                  &
+             , n, Atmp, lda                                 &
+             , WR, WI                                       &
+             , VL, ldvl, VR, ldvr                           &
+             , ILO, IHI, SCALE_array, ABNRM, RCONDE, RCONDV &
+             , WORK, LWORK, IWORK, INFO )
+  if(INFO .ne. 0) then
+    print*, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, SCALE_array, RCONDE, RCONDV, IWORK )
+  deallocate( VL, Atmp )
+
+
+  allocate( energy_loc(n), V_loc(n,n) )
+  energy_loc = 0.d0
+  V_loc = 0.d0
+
+  i = 1
+  do while(i .le. n)
+
+!    print*, i, WR(i), WI(i)
+
+    if( dabs(WI(i)) .gt. 1e-7 ) then
+
+      print*, ' Found an imaginary component to eigenvalue'
+      print*, ' Re(i) + Im(i)', i, WR(i), WI(i)
+
+      energy_loc(i) = WR(i)
+      do j = 1, n
+        V_loc(j,i) = WR(i) * VR(j,i) - WI(i) * VR(j,i+1)
+      enddo
+      energy_loc(i+1) = WI(i)
+      do j = 1, n
+        V_loc(j,i+1) = WR(i) * VR(j,i+1) + WI(i) * VR(j,i)
+      enddo
+      i = i + 2
+
+    else
+
+      energy_loc(i) = WR(i)
+      do j = 1, n
+        V_loc(j,i) = VR(j,i)
+      enddo
+      i = i + 1
+
+    endif
+
+  enddo
+
+  deallocate(WR, WI, VR)
+
+
+  ! ordering
+!  do j = 1, n
+!    write(444, '(100(1X, F16.10))') (V_loc(j,i), i=1,5)
+!  enddo
+  allocate( iorder(n) )
+  do i = 1, n
+    iorder(i) = i
+  enddo
+  call dsort(energy_loc, iorder, n)
+  do i = 1, n
+    energy(i) = energy_loc(i)
+    do j = 1, n
+      V(j,i) = V_loc(j,iorder(i))
+    enddo
+  enddo
+  deallocate(iorder)
+!  do j = 1, n
+!    write(445, '(100(1X, F16.10))') (V_loc(j,i), i=1,5)
+!  enddo
+  deallocate(V_loc, energy_loc)
+
+end subroutine diag_nonsym_right
+
+! ---
+
+! Taken from GammCor thanks to Michal Hapka :-)
+
+
+subroutine pivoted_cholesky( A, rank, tol, ndim, U)
+!
+! A = U**T * U
+!
+! matrix A is destroyed inside this subroutine
+! Cholesky vectors are stored in U
+! dimension of U: U(1:rank, 1:n)
+! U is allocated inside this subroutine
+! rank is the number of Cholesky vectors depending on tol
+!
+integer :: ndim
+integer, intent(inout)                                        :: rank
+double precision, dimension(ndim, ndim), intent(inout)        :: A
+double precision, dimension(ndim, rank), intent(out)          :: U
+double precision, intent(in)                                  :: tol
+
+integer, dimension(:), allocatable          :: piv
+double precision, dimension(:), allocatable :: work
+character, parameter :: uplo = "U"
+integer :: N, LDA
+integer :: info
+integer :: k, l, rank0
+external :: dpstrf
+
+rank0 = rank
+N = size(A, dim=1)
+LDA = N
+allocate(piv(N))
+allocate(work(2*N))
+call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info)
+
+if (rank > rank0) then
+  print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling'
+  stop
+end if
+
+do k = 1, N
+  A(k+1:, k) = 0.00D+0
+end do
+! TODO: It should be possible to use only one vector of size (1:rank) as a buffer
+! to do the swapping in-place
+U = 0.00D+0
+do k = 1, N
+  l = piv(k)
+  U(l, :) = A(1:rank, k)
+end do
+
+end subroutine pivoted_cholesky
+
diff --git a/src/utils/loc.f b/src/utils/loc.f
new file mode 100644
index 00000000..02693281
--- /dev/null
+++ b/src/utils/loc.f
@@ -0,0 +1,327 @@
+c************************************************************************
+      subroutine maxovl(n,m,s,t,w)
+C
+C     This subprogram contains an iterative procedure to find the
+C     unitary transformation of a set of n vectors which maximizes
+C     the sum of their square overlaps with a set of m reference
+C     vectors (m.le.n)
+C
+C     S: overlap matrix <ref|vec>
+C     T: rotation matrix
+C     W: new overlap matrix
+C
+C
+      implicit real*8(a-h,o-y),logical*1(z)
+!      parameter (id1=700)
+!      dimension s(id1,id1),t(id1,id1),w(id1,id1)
+      double precision, intent(inout) :: s(n,n)
+      double precision, intent(out) :: t(n,n),w(n,n)
+      data small/1.d-6/
+
+      zprt=.true.
+      niter=1000000
+      conv=1.d-12
+
+C      niter=1000000
+C      conv=1.d-6
+      write (6,5) n,m,conv
+    5 format (//5x,'Unitary transformation of',i3,'  vectors'/
+     * 5x,'following the principle of maximum overlap with a set of',
+     * i3,' reference vectors'/5x,'required convergence on rotation ',
+     * 'angle =',f13.10///5x,'Starting overlap matrix'/)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (s(i,j),j=1,n)
+      end do
+    8 mm=m-1
+      if (m.lt.n) mm=m
+      iter=0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        end do
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      end do
+      sum=sum/m
+      if (zprt) write (6,12) sum
+   12 format (//5x,'Average square overlap =',f10.6)
+      if (n.eq.1) goto 100
+      last=n
+      j=1
+   21 if (j.ge.last) goto 30
+      sum=0.d0
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
+      if (sum.gt.small) goto 28
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
+      last=last-1
+      goto 21
+   28 j=j+1
+      goto 21
+   30 iter=iter+1
+      imax=0
+      jmax=0
+      dmax=0.d0
+      amax=0.d0
+      do 60 i=1,mm
+      ip=i+1
+      do 50 j=ip,n
+      a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+      b=-s(i,i)*s(i,j)
+      if (j.gt.m) goto 31
+      a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+      b=b+s(j,i)*s(j,j)
+   31 b=b+b
+      if (a.eq.0.d0) goto 32
+      ba=b/a
+      if (dabs(ba).gt.small) goto 32
+      if (a.gt.0.d0) goto 33
+      tang=-0.5d0*ba
+      cosine=1.d0/dsqrt(1.d0+tang*tang)
+      sine=tang*cosine
+      goto 34
+   32 tang=0.d0
+      if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+      cosine=1.d0/dsqrt(1.d0+tang*tang)
+      sine=tang*cosine
+      goto 34
+   33 cosine=0.d0
+      sine=1.d0
+   34 delta=sine*(a*sine+b*cosine)
+      if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta
+      do k=1,m
+        p=s(k,i)*cosine-s(k,j)*sine
+        q=s(k,i)*sine+s(k,j)*cosine
+        s(k,i)=p
+        s(k,j)=q
+      enddo
+      do k=1,n
+        p=t(k,i)*cosine-t(k,j)*sine
+        q=t(k,i)*sine+t(k,j)*cosine
+        t(k,i)=p
+        t(k,j)=q
+      enddo
+   45 d=dabs(sine)
+      if (d.le.amax) goto 50
+      imax=i
+      jmax=j
+      amax=d
+      dmax=delta
+   50 continue
+   60 continue
+      if (zprt) write (6,70) iter,amax,imax,jmax,dmax
+   70 format (' iter=',i4,' largest rotation=',f12.8,
+     * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
+   71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
+      if (amax.lt.conv) goto 100
+      if (iter.lt.niter) goto 30
+      write (6,80)
+      write (6,*) 'niter=',niter
+   80 format (//5x,'*** maximum number of cycles exceeded ',
+     * 'in subroutine maxovl ***'//)
+      stop
+  100 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
+      if (.not.zprt) return
+      write (6,12) sum
+      write (6,130)
+  130 format (//5x,'transformation matrix')
+      do i=1,n
+        write (6,145) i
+        write (6,150) (t(i,j),j=1,n)
+      enddo
+  145 format (i8)
+  150 format (2x,10f12.8)
+      write (6,160)
+  160 format (//5x,'new overlap matrix'/)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (w(i,j),j=1,n)
+      enddo
+      return
+      end
+
+
+c************************************************************************
+      subroutine maxovl_no_print(n,m,s,t,w)
+C
+C     This subprogram contains an iterative procedure to find the
+C     unitary transformation of a set of n vectors which maximizes
+C     the sum of their square overlaps with a set of m reference
+C     vectors (m.le.n)
+C
+C     S: overlap matrix <ref|vec>
+C     T: rotation matrix
+C     W: new overlap matrix
+C
+C
+      implicit real*8(a-h,o-y),logical*1(z)
+      parameter (id1=300)
+      dimension s(id1,id1),t(id1,id1),w(id1,id1)
+      data small/1.d-6/
+
+      zprt=.false.
+      niter=1000000
+      conv=1.d-8
+
+C      niter=1000000
+C      conv=1.d-6
+    8 mm=m-1
+      if (m.lt.n) mm=m
+      iter=0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        enddo
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+   12 format (//5x,'Average square overlap =',f10.6)
+      if (n.eq.1) goto 100
+      last=n
+      j=1
+   21 if (j.ge.last) goto 30
+      sum=0.d0
+
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
+      if (sum.gt.small) goto 28
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
+      last=last-1
+      goto 21
+   28 j=j+1
+      goto 21
+   30 iter=iter+1
+      imax=0
+      jmax=0
+      dmax=0.d0
+      amax=0.d0
+      do i=1,mm
+        ip=i+1
+        do j=ip,n
+          a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+          b=-s(i,i)*s(i,j)
+          if (j.gt.m) goto 31
+          a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+          b=b+s(j,i)*s(j,j)
+  31      b=b+b
+          if (a.eq.0.d0) goto 32
+          ba=b/a
+          if (dabs(ba).gt.small) goto 32
+          if (a.gt.0.d0) goto 33
+          tang=-0.5d0*ba
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  32      tang=0.d0
+          if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  33      cosine=0.d0
+          sine=1.d0
+  34      delta=sine*(a*sine+b*cosine)
+          do k=1,m
+            p=s(k,i)*cosine-s(k,j)*sine
+            q=s(k,i)*sine+s(k,j)*cosine
+            s(k,i)=p
+            s(k,j)=q
+          enddo
+          do k=1,n
+            p=t(k,i)*cosine-t(k,j)*sine
+            q=t(k,i)*sine+t(k,j)*cosine
+            t(k,i)=p
+            t(k,j)=q
+          enddo
+  45      d=dabs(sine)
+          if (d.le.amax) goto 50
+          imax=i
+          jmax=j
+          amax=d
+          dmax=delta
+  50      continue
+        end do
+      end do
+   70 format (' iter=',i4,' largest rotation=',f12.8,
+     * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
+   71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
+      if (amax.lt.conv) goto 100
+      if (iter.lt.niter) goto 30
+   80 format (//5x,'*** maximum number of cycles exceeded ',
+     * 'in subroutine maxovl ***'//)
+      stop
+  100 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
+      return
+      end
+
diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90
index 98e73470..ceaec874 100644
--- a/src/utils/map_module.f90
+++ b/src/utils/map_module.f90
@@ -238,11 +238,11 @@ subroutine cache_map_sort(map)
       iorder(i) = i
     enddo
     if (cache_key_kind == 2) then
-      call i2radix_sort(map%key,iorder,map%n_elements,-1)
+      call i2sort(map%key,iorder,map%n_elements,-1)
     else if (cache_key_kind == 4) then
-      call iradix_sort(map%key,iorder,map%n_elements,-1)
+      call isort(map%key,iorder,map%n_elements,-1)
     else if (cache_key_kind == 8) then
-      call i8radix_sort(map%key,iorder,map%n_elements,-1)
+      call i8sort(map%key,iorder,map%n_elements,-1)
     endif
     if (integral_kind == 4) then
       call set_order(map%value,iorder,map%n_elements)
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index cacc3bf7..081adee3 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -145,3 +145,72 @@ end
 
 
 
+subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points)
+
+  BEGIN_DOC
+  !.. math::
+  !
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
+  !   S = S_x S_y S_z
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)             ! power of the x1 functions
+  double precision, intent(in)  :: A_center(n_points,3), B_center(3)  ! center of the x1 functions
+  double precision, intent(in)  :: alpha, beta
+  double precision, intent(out) :: overlap(n_points)
+
+  integer                       :: i
+  integer                       :: iorder_p(3), ipoint, ldp
+  integer                       :: nmax
+  double precision              :: F_integral_tab(0:max_dim)
+  double precision              :: p, overlap_x, overlap_y, overlap_z
+  double precision              :: F_integral
+  double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:)
+
+  ldp = maxval(power_A(1:3) + power_B(1:3))
+
+  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points))
+
+  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, n_points, B_center, n_points)
+
+  nmax = maxval(iorder_p)
+  do i = 0, nmax
+    F_integral_tab(i) = F_integral(i,p)
+  enddo
+
+  do ipoint = 1, n_points
+
+    if(fact_p(ipoint) .lt. 1d-20) then
+      overlap(ipoint) = 1.d-10
+      cycle
+    endif
+
+    overlap_x = P_new(ipoint,0,1) * F_integral_tab(0)
+    do i = 1, iorder_p(1)
+      overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i)
+    enddo
+
+    overlap_y = P_new(ipoint,0,2) * F_integral_tab(0)
+    do i = 1, iorder_p(2)
+      overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i)
+    enddo
+
+    overlap_z = P_new(ipoint,0,3) * F_integral_tab(0)
+    do i = 1, iorder_p(3)
+      overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i)
+    enddo
+
+    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_p(ipoint)
+  enddo
+
+  deallocate(P_new, P_center, fact_p)
+
+end subroutine overlap_gaussian_xyz_v
+
+! ---
diff --git a/src/utils/qsort.c b/src/utils/qsort.c
new file mode 100644
index 00000000..c011b35a
--- /dev/null
+++ b/src/utils/qsort.c
@@ -0,0 +1,373 @@
+/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
+#include <stdlib.h>
+#include <stdint.h>
+
+struct int16_t_comp {
+  int16_t    x;
+  int32_t i;
+};
+
+int compare_int16_t( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
+}
+
+
+struct int16_t_comp_big {
+  int16_t    x;
+  int64_t i;
+};
+
+int compare_int16_t_big( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
+}
+
+
+struct int32_t_comp {
+  int32_t    x;
+  int32_t i;
+};
+
+int compare_int32_t( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
+}
+
+
+struct int32_t_comp_big {
+  int32_t    x;
+  int64_t i;
+};
+
+int compare_int32_t_big( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
+}
+
+
+struct int64_t_comp {
+  int64_t    x;
+  int32_t i;
+};
+
+int compare_int64_t( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
+}
+
+
+struct int64_t_comp_big {
+  int64_t    x;
+  int64_t i;
+};
+
+int compare_int64_t_big( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
+}
+
+
+struct double_comp {
+  double    x;
+  int32_t i;
+};
+
+int compare_double( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct double_comp* A = malloc(isize * sizeof(struct double_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx(double* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
+}
+
+
+struct double_comp_big {
+  double    x;
+  int64_t i;
+};
+
+int compare_double_big( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx_big(double* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
+}
+
+
+struct float_comp {
+  float    x;
+  int32_t i;
+};
+
+int compare_float( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct float_comp* A = malloc(isize * sizeof(struct float_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx(float* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
+}
+
+
+struct float_comp_big {
+  float    x;
+  int64_t i;
+};
+
+int compare_float_big( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx_big(float* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
+}
+/* Generated C file:1 ends here */
diff --git a/src/utils/qsort.org b/src/utils/qsort.org
new file mode 100644
index 00000000..abf0d54a
--- /dev/null
+++ b/src/utils/qsort.org
@@ -0,0 +1,169 @@
+#+TITLE: Quick sort binding for Fortran
+
+* C template
+
+#+NAME: c_template
+#+BEGIN_SRC c
+struct TYPE_comp_big {
+  TYPE    x;
+  int32_t i;
+};
+
+int compare_TYPE_big( const void * l, const void * r )
+{
+  const TYPE * restrict _l= l;
+  const TYPE * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
+}
+#+END_SRC
+
+* Fortran template
+
+#+NAME:f_template
+#+BEGIN_SRC f90
+subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_big_c
+
+subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_noidx_big_c
+
+#+END_SRC
+
+#+NAME:f_template2
+#+BEGIN_SRC f90
+subroutine Lsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+  call Lsort_big_c(A, iorder, isize)
+end subroutine Lsort_big
+
+subroutine Lsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_TYPE)    :: A(isize)
+  call Lsort_noidx_big_c(A, isize)
+end subroutine Lsort_noidx_big
+
+#+END_SRC
+
+* Python scripts for type replacements
+
+#+NAME: replaced
+#+begin_src python :results output :noweb yes
+data = """
+<<c_template>>
+"""
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f2
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template2>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+* Generated C file
+  
+#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
+#include <stdlib.h>
+#include <stdint.h>
+<<replaced()>>
+#+END_SRC
+
+* Generated Fortran file
+
+#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     <<replaced_f()>>
+  end interface
+
+end module qsort_module
+
+<<replaced_f2()>>
+
+#+END_SRC
+
diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90
new file mode 100644
index 00000000..a72a4f9e
--- /dev/null
+++ b/src/utils/qsort_module.f90
@@ -0,0 +1,347 @@
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     
+     subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_c
+     
+     subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_c
+     
+     
+     
+     subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_big_c
+     
+     subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_big_c
+     
+     
+     
+     subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_c
+     
+     subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_c
+     
+     
+     
+     subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_big_c
+     
+     subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_big_c
+     
+     
+     
+     subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_c
+     
+     subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_c
+     
+     
+     
+     subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_big_c
+     
+     subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_big_c
+     
+     
+     
+     subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_c
+     
+     subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_c
+     
+     
+     
+     subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_big_c
+     
+     subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_big_c
+     
+     
+     
+     subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_c
+     
+     subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_c
+     
+     
+     
+     subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_big_c
+     
+     subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_big_c
+     
+     
+     
+  end interface
+
+end module qsort_module
+
+
+subroutine i2sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_c(A, iorder, isize)
+end subroutine i2sort
+
+subroutine i2sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_c(A, isize)
+end subroutine i2sort_noidx
+
+
+
+subroutine i2sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_big_c(A, iorder, isize)
+end subroutine i2sort_big
+
+subroutine i2sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_big_c(A, isize)
+end subroutine i2sort_noidx_big
+
+
+
+subroutine isort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_c(A, iorder, isize)
+end subroutine isort
+
+subroutine isort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_c(A, isize)
+end subroutine isort_noidx
+
+
+
+subroutine isort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_big_c(A, iorder, isize)
+end subroutine isort_big
+
+subroutine isort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_big_c(A, isize)
+end subroutine isort_noidx_big
+
+
+
+subroutine i8sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_c(A, iorder, isize)
+end subroutine i8sort
+
+subroutine i8sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_c(A, isize)
+end subroutine i8sort_noidx
+
+
+
+subroutine i8sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_big_c(A, iorder, isize)
+end subroutine i8sort_big
+
+subroutine i8sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_big_c(A, isize)
+end subroutine i8sort_noidx_big
+
+
+
+subroutine dsort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_c(A, iorder, isize)
+end subroutine dsort
+
+subroutine dsort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_c(A, isize)
+end subroutine dsort_noidx
+
+
+
+subroutine dsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_big_c(A, iorder, isize)
+end subroutine dsort_big
+
+subroutine dsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_big_c(A, isize)
+end subroutine dsort_noidx_big
+
+
+
+subroutine sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_c(A, iorder, isize)
+end subroutine sort
+
+subroutine sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_c(A, isize)
+end subroutine sort_noidx
+
+
+
+subroutine sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_big_c(A, iorder, isize)
+end subroutine sort_big
+
+subroutine sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_big_c(A, isize)
+end subroutine sort_noidx_big
diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f
index ff40263c..089c3871 100644
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@@ -1,222 +1,4 @@
 BEGIN_TEMPLATE
- subroutine insertion_$Xsort (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  $type                          :: xtmp
-  integer                        :: i, i0, j, jmax
-
-  do i=2,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j=i-1
-    do while (j>0)
-      if ((x(j) <= xtmp)) exit
-      x(j+1) = x(j)
-      iorder(j+1) = iorder(j)
-      j=j-1
-    enddo
-    x(j+1) = xtmp
-    iorder(j+1) = i0
-  enddo
- end subroutine insertion_$Xsort
-
- subroutine quick_$Xsort(x, iorder, isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the quicksort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer, external              :: omp_get_num_threads
-  call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
- end
-
- recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
-  implicit none
-  integer, intent(in)            :: isize, first, last, level
-  integer,intent(inout)          :: iorder(isize)
-  $type, intent(inout)           :: x(isize)
-  $type                          :: c, tmp
-  integer                        :: itmp
-  integer                        :: i, j
-
-  if(isize<2)return
-
-  c = x( shiftr(first+last,1) )
-  i = first
-  j = last
-  do
-    do while (x(i) < c)
-      i=i+1
-    end do
-    do while (c < x(j))
-      j=j-1
-    end do
-    if (i >= j) exit
-    tmp  = x(i)
-    x(i) = x(j)
-    x(j) = tmp
-    itmp      = iorder(i)
-    iorder(i) = iorder(j)
-    iorder(j) = itmp
-    i=i+1
-    j=j-1
-  enddo
-  if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  else
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  endif
- end
-
- subroutine heap_$Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-
-  integer                        :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
- end subroutine heap_$Xsort
-
- subroutine heap_$Xsort_big(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-
-  integer*8                      :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
-
- end subroutine heap_$Xsort_big
 
  subroutine sorted_$Xnumber(x,isize,n)
   implicit none
@@ -250,222 +32,6 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-!---------------------- INTEL
-IRP_IF INTEL
-
-BEGIN_TEMPLATE
- subroutine $Xsort(x,iorder,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
-  deallocate(tmp)
-  iorder(1:isize) = iorder(1:isize)+1
-  call $Xset_order(x,iorder,isize)
- end
-
- subroutine $Xsort_noidx(x,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
-  deallocate(tmp)
- end
-
-SUBST [ X, type, ityp, n, ippsz ]
-   ; real ; 32f ; 4 ; 13 ;;
- i ; integer ; 32s ; 4 ; 11 ;;
- i2 ; integer*2 ; 16s ; 2 ; 7 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call $Xradix_sort(x,iorder,isize,-1)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i8 ; integer*8 ;;
-END_TEMPLATE
-
-!---------------------- END INTEL
-IRP_ELSE
-!---------------------- NON-INTEL
-BEGIN_TEMPLATE
-
- subroutine $Xsort_noidx(x,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer, allocatable           :: iorder(:)
-  integer                        :: i
-  allocate(iorder(isize))
-  do i=1,isize
-   iorder(i)=i
-  enddo
-  call $Xsort(x,iorder,isize)
-  deallocate(iorder)
- end subroutine $Xsort_noidx
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call $Xradix_sort(x,iorder,isize,-1)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-IRP_ENDIF
-!---------------------- END NON-INTEL
-
-
 
 BEGIN_TEMPLATE
  subroutine $Xset_order(x,iorder,isize)
@@ -491,47 +57,6 @@ BEGIN_TEMPLATE
   deallocate(xtmp)
  end
 
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8; integer*8 ;;
- i2; integer*2 ;;
-END_TEMPLATE
-
-
-BEGIN_TEMPLATE
- subroutine insertion_$Xsort_big (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-  $type                          :: xtmp
-  integer*8                      :: i, i0, j, jmax
-
-  do i=2_8,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j = i-1_8
-    do while (j>0_8)
-      if (x(j)<=xtmp) exit
-      x(j+1_8) = x(j)
-      iorder(j+1_8) = iorder(j)
-      j = j-1_8
-    enddo
-    x(j+1_8) = xtmp
-    iorder(j+1_8) = i0
-  enddo
-
- end subroutine insertion_$Xsort_big
-
  subroutine $Xset_order_big(x,iorder,isize)
   implicit none
   BEGIN_DOC
@@ -565,223 +90,3 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-BEGIN_TEMPLATE
-
-recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
-  implicit none
-
-  BEGIN_DOC
-  ! Sort integer array x(isize) using the radix sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! iradix should be -1 in input.
-  END_DOC
-  integer*$int_type, intent(in)  :: isize
-  integer*$int_type, intent(inout) :: iorder(isize)
-  integer*$type, intent(inout)   :: x(isize)
-  integer, intent(in)            :: iradix
-  integer                        :: iradix_new
-  integer*$type, allocatable     :: x2(:), x1(:)
-  integer*$type                  :: i4               ! data type
-  integer*$int_type, allocatable :: iorder1(:),iorder2(:)
-  integer*$int_type              :: i0, i1, i2, i3, i ! index type
-  integer*$type                  :: mask
-  integer                        :: err
-  !DIR$ ATTRIBUTES ALIGN : 128   :: iorder1,iorder2, x2, x1
-
-  if (isize < 2) then
-    return
-  endif
-
-  if (iradix == -1) then ! Sort Positive and negative
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-    do i=1_$int_type,isize
-      if (x(i) < 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = -x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i2
-      iorder(i1+i) = iorder2(i)
-      x(i1+i) = x2(i)
-    enddo
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i1 > 1_$int_type) then
-      call $Xradix_sort$big(x1,iorder1,i1,-2)
-      do i=1_$int_type,i1
-        x(i) = -x1(1_$int_type+i1-i)
-        iorder(i) = iorder1(1_$int_type+i1-i)
-      enddo
-    endif
-
-    if (i2>1_$int_type) then
-      call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
-    endif
-
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-    return
-
-  else if (iradix == -2) then ! Positive
-
-    ! Find most significant bit
-
-    i0 = 0_$int_type
-    i4 = maxval(x)
-
-    iradix_new = max($integer_size-1-leadz(i4),1)
-    mask = ibset(0_$type,iradix_new)
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-
-    do i=1_$int_type,isize
-      if (iand(mask,x(i)) == 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i1
-      iorder(i0+i) = iorder1(i)
-      x(i0+i) = x1(i)
-    enddo
-    i0 = i0+i1
-    i3 = i0
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-
-
-    do i=1_$int_type,i2
-      iorder(i0+i) = iorder2(i)
-      x(i0+i) = x2(i)
-    enddo
-    i0 = i0+i2
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i3>1_$int_type) then
-      call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
-    endif
-
-    if (isize-i3>1_$int_type) then
-      call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
-    endif
-
-    return
-  endif
-
-  ASSERT (iradix >= 0)
-
-  if (isize < 48) then
-    call insertion_$Xsort$big(x,iorder,isize)
-    return
-  endif
-
-
-  allocate(x2(isize),iorder2(isize),stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x1, iorder1'
-    stop
-  endif
-
-
-  mask = ibset(0_$type,iradix)
-  i0=1_$int_type
-  i1=1_$int_type
-
-  do i=1_$int_type,isize
-    if (iand(mask,x(i)) == 0_$type) then
-      iorder(i0) = iorder(i)
-      x(i0) = x(i)
-      i0 = i0+1_$int_type
-    else
-      iorder2(i1) = iorder(i)
-      x2(i1) = x(i)
-      i1 = i1+1_$int_type
-    endif
-  enddo
-  i0=i0-1_$int_type
-  i1=i1-1_$int_type
-
-  do i=1_$int_type,i1
-    iorder(i0+i) = iorder2(i)
-    x(i0+i) = x2(i)
-  enddo
-
-  deallocate(x2,iorder2,stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x2, iorder2'
-    stop
-  endif
-
-
-  if (iradix == 0) then
-    return
-  endif
-
-
-  if (i1>1_$int_type) then
-    call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
-  endif
-  if (i0>1) then
-    call $Xradix_sort$big(x,iorder,i0,iradix-1)
-  endif
-
- end
-
-SUBST [ X, type, integer_size, is_big, big, int_type ]
- i  ; 4 ; 32 ; .False. ;      ; 4 ;;
- i8 ; 8 ; 64 ; .False. ;      ; 4 ;;
- i2 ; 2 ; 16 ; .False. ;      ; 4 ;;
- i  ; 4 ; 32 ; .True.  ; _big ; 8 ;;
- i8 ; 8 ; 64 ; .True.  ; _big ; 8 ;;
-END_TEMPLATE
-
-
-
diff --git a/src/utils/units.irp.f b/src/utils/units.irp.f
new file mode 100644
index 00000000..51dcec82
--- /dev/null
+++ b/src/utils/units.irp.f
@@ -0,0 +1,32 @@
+BEGIN_PROVIDER [double precision, ha_to_ev]
+&BEGIN_PROVIDER [double precision, au_to_D]
+&BEGIN_PROVIDER [double precision, planck_cte]
+&BEGIN_PROVIDER [double precision, light_speed]
+&BEGIN_PROVIDER [double precision, Ha_to_J]
+&BEGIN_PROVIDER [double precision, Ha_to_nm]
+
+  implicit none
+
+  BEGIN_DOC
+  ! Some conversion between different units
+  END_DOC
+
+  ! Hartree to eV
+  Ha_to_eV = 27.211396641308d0
+
+  ! au to Debye
+  au_to_D = 2.5415802529d0
+
+  ! Planck's constant in SI units
+  planck_cte = 6.62606957d-34
+
+  ! Light speed in SI units
+  light_speed = 2.99792458d10
+
+  ! Hartree to Joule
+  Ha_to_J = 4.35974434d-18
+
+  ! Hartree to nm
+  Ha_to_nm = 1d9 * (planck_cte * light_speed) / Ha_to_J
+
+END_PROVIDER
diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f
index ef846bdb..41e7cad6 100644
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@@ -37,6 +37,10 @@ double precision function binom_func(i,j)
   else
     binom_func = dexp( logfact(i)-logfact(j)-logfact(i-j) )
   endif
+
+  ! To avoid .999999 numbers
+  binom_func = floor(binom_func + 0.5d0)
+
 end
 
 
@@ -132,7 +136,7 @@ double precision function logfact(n)
   enddo
 end function
 
-
+! ---
 
 BEGIN_PROVIDER [ double precision, fact_inv, (128) ]
   implicit none
@@ -146,6 +150,29 @@ BEGIN_PROVIDER [ double precision, fact_inv, (128) ]
   enddo
 END_PROVIDER
 
+! ---
+
+BEGIN_PROVIDER [ double precision, shiftfact_op5_inv, (128) ]
+
+  BEGIN_DOC
+  !
+  ! 1 / Gamma(n + 0.5)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+
+  do i = 1, size(shiftfact_op5_inv)
+    !tmp = dgamma(dble(i) + 0.5d0)
+    tmp = gamma(dble(i) + 0.5d0)
+    shiftfact_op5_inv(i) = 1.d0 / tmp
+  enddo
+
+END_PROVIDER
+
+! ---
 
 double precision function dble_fact(n)
   implicit none
@@ -300,12 +327,12 @@ subroutine wall_time(t)
 end
 
 BEGIN_PROVIDER [ integer, nproc ]
-  use omp_lib
   implicit none
   BEGIN_DOC
   ! Number of current OpenMP threads
   END_DOC
 
+  integer, external              :: omp_get_num_threads
   nproc = 1
   !$OMP PARALLEL
   !$OMP MASTER
@@ -407,3 +434,63 @@ subroutine lowercase(txt,n)
   enddo
 end
 
+subroutine v2_over_x(v,x,res)
+
+  !BEGIN_DOC
+  ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0
+  !END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: v, x
+  double precision, intent(out) :: res
+
+  double precision :: delta_E, tmp, val
+
+  res = 0d0
+  delta_E = x
+  if (v == 0.d0) return
+
+  val = 2d0 * v
+  tmp = dsqrt(delta_E * delta_E + val * val)
+  if (delta_E < 0.d0) then
+      tmp = -tmp
+  endif
+  res = 0.5d0 * (tmp - delta_E)
+
+end
+
+subroutine sum_A_At(A, N)
+
+  !BEGIN_DOC
+  ! useful for symmetrizing a tensor without a temporary tensor
+  !END_DOC
+
+  implicit none
+  integer,          intent(in)    :: N
+  double precision, intent(inout) :: A(N,N)
+  integer                         :: i, j
+
+ !$OMP PARALLEL       &
+ !$OMP DEFAULT (NONE) &
+ !$OMP PRIVATE (i, j) & 
+ !$OMP SHARED (A, N)
+ !$OMP DO 
+  do j = 1, N
+    do i = j, N
+      A(i,j) += A(j,i)
+    enddo
+  enddo
+ !$OMP END DO
+
+ !$OMP DO 
+  do j = 2, N
+    do i = 1, j-1
+      A(i,j) = A(j,i)
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+end
+
diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg
new file mode 100644
index 00000000..71ee87e3
--- /dev/null
+++ b/src/utils_cc/EZFIO.cfg
@@ -0,0 +1,77 @@
+[cc_thresh_conv]
+type: double precision
+doc: Threshold for the convergence of the residual equations.
+interface: ezfio,ocaml,provider
+default: 1e-6
+
+[cc_max_iter]
+type: integer
+doc: Maximum number of iterations.
+interface: ezfio,ocaml,provider
+default: 100
+
+[cc_diis_depth]
+type: integer
+doc: Maximum depth of the DIIS, i.e., maximum number of iterations that the DIIS keeps in memory. Warning, we allocate matrices with the diis depth at the beginning without update. If you don't have enough memory it should crash in memory.
+interface: ezfio,ocaml,provider
+default: 8
+
+[cc_level_shift]
+type: double precision
+doc: Level shift for the CC
+interface: ezfio,ocaml,provider
+default: 0.0
+
+[cc_level_shift_guess]
+type: double precision
+doc: Level shift for the guess of the CC amplitudes
+interface: ezfio,ocaml,provider
+default: 0.0
+
+[cc_update_method]
+type: character*(32)
+doc: Method used to update the CC amplitudes. none -> normal, diis -> with diis.
+interface: ezfio,ocaml,provider
+default: diis
+
+[cc_guess_t1]
+type: character*(32)
+doc: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk.
+interface: ezfio,ocaml,provider
+default: MP
+
+[cc_guess_t2]
+type: character*(32)
+doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk.
+interface: ezfio,ocaml,provider
+default: MP
+
+[cc_write_t1]
+type: logical
+doc: If true, it will write on disk the T1 amplitudes at the end of the calculation.
+interface: ezfio,ocaml,provider
+default: False
+
+[cc_write_t2]
+type: logical
+doc: If true, it will write on disk the T2 amplitudes at the end of the calculation.
+interface: ezfio,ocaml,provider
+default: False
+
+[cc_par_t]
+type: logical
+doc: If true, the CCSD(T) will be computed.
+interface: ezfio,ocaml,provider
+default: False
+
+[cc_dev]
+type: logical
+doc: Only for dev purposes.
+interface: ezfio,ocaml,provider
+default: False
+
+[cc_ref]
+type: integer
+doc: Index of the reference determinant in psi_det for CC calculation.
+interface: ezfio,ocaml,provider
+default: 1
diff --git a/src/utils_cc/NEED b/src/utils_cc/NEED
new file mode 100644
index 00000000..bd5a151f
--- /dev/null
+++ b/src/utils_cc/NEED
@@ -0,0 +1,4 @@
+hartree_fock
+two_body_rdm
+bitmask
+determinants
diff --git a/src/utils_cc/README.md b/src/utils_cc/README.md
new file mode 100644
index 00000000..87cde388
--- /dev/null
+++ b/src/utils_cc/README.md
@@ -0,0 +1,34 @@
+# Utils for CC
+
+Utils for the CC modules.
+
+## Contents
+- Providers related to reference occupancy
+- Integrals related to the reference
+- Diis for CC (but can be used for something else if you provide your own error vector)
+- Guess for CC amplitudes
+- Routines to update the CC amplitudes
+- Phase between to arbitrary determinants
+- print of the qp edit wf
+
+## Keywords
+- cc_thresh_conv: Threshold for the convergence of the residual equations. Default: 1e-6.
+- cc_max_iter: Maximum number of iterations. Default: 100.
+- cc_diis_depth: Diis depth. Default: 8.
+- cc_level_shift: Level shift for the CC. Default: 0.0.
+- cc_level_shift_guess: Level shift for the MP guess of the amplitudes. Default: 0.0.
+- cc_update_method: Method used to update the CC amplitudes. none -> normal, diis -> with diis. Default: diis.
+- cc_guess_t1: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP.
+- cc_guess_t2: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP.
+- cc_write_t1: If true, it will write on disk the T1 amplitudes at the end of the calculation. Default: False.
+- cc_write_t2: If true, it will write on disk the T2 amplitudes at the end of the calculation. Default: False.
+- cc_par_t: If true, the CCSD(T) will be computed.
+- cc_ref: Index of the reference determinant in psi_det for CC calculation. Default: 1.
+
+## Org files
+The org files are stored in the directory org in order to avoid overwriting on user changes.
+The org files can be modified, to export the change to the source code, run
+```
+./TANGLE_org_mode.sh and 
+mv *.irp.f ../.
+```
diff --git a/src/utils_cc/diis.irp.f b/src/utils_cc/diis.irp.f
new file mode 100644
index 00000000..fe771373
--- /dev/null
+++ b/src/utils_cc/diis.irp.f
@@ -0,0 +1,529 @@
+! Code
+
+subroutine diis_cc(all_err,all_t,sze,m,iter,t)
+
+  implicit none
+
+  BEGIN_DOC
+  ! DIIS. Take the error vectors and the amplitudes of the previous
+  ! iterations to compute the new amplitudes
+  END_DOC
+  
+  ! {err_i}_{i=1}^{m_it} -> B -> c
+  ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1}
+
+  integer, intent(in)             :: m,iter,sze
+  double precision, intent(in)    :: all_err(sze,m)
+  double precision, intent(in)    :: all_t(sze,m)
+  
+  double precision, intent(out)   :: t(sze)
+  
+  double precision, allocatable   :: B(:,:), c(:), zero(:)
+  integer                         :: m_iter
+  integer                         :: i,j,k
+  integer                         :: info
+  integer, allocatable            :: ipiv(:)
+  double precision                :: accu
+  
+  m_iter = min(m,iter)
+  !print*,'m_iter',m_iter
+  allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1))
+  allocate(ipiv(m+1))
+
+  ! B(i,j) =  < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us
+  B = 0d0
+  !$OMP PARALLEL &
+  !$OMP SHARED(B,m,m_iter,sze,all_err) &
+  !$OMP PRIVATE(i,j,k,accu) &
+  !$OMP DEFAULT(NONE)
+  do j = 1, m_iter
+    do i = 1, m_iter
+      accu = 0d0
+      !$OMP DO
+      do k = 1, sze
+        ! the errors of the ith iteration are in all_err(:,m+1-i)
+        accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j)
+      enddo
+      !$OMP END DO NOWAIT
+      !$OMP CRITICAL
+      B(i,j) = B(i,j) + accu
+      !$OMP END CRITICAL
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+  do i = 1, m_iter
+    B(i,m_iter+1) = -1
+  enddo
+  do j = 1, m_iter
+    B(m_iter+1,j) = -1
+  enddo
+  ! Debug
+  !print*,'B'
+  !do i = 1, m_iter+1
+  !  write(*,'(100(F10.6))') B(i,:)
+  !enddo
+
+  ! (0 0 .... 0 -1)
+  zero = 0d0
+  zero(m_iter+1) = -1d0
+
+  ! Solve B.c = zero
+  call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info)
+  if (info /= 0) then
+    print*,'DIIS error in dgesv:', info
+    call abort
+  endif
+  ! c corresponds to the m_iter first solutions
+  c = zero(1:m_iter)
+  ! Debug
+  !print*,'c',c
+  !print*,'all_t' 
+  !do i = 1, m
+  !  write(*,'(100(F10.6))') all_t(:,i)
+  !enddo
+  !print*,'all_err' 
+  !do i = 1, m
+  !  write(*,'(100(F10.6))') all_err(:,i)
+  !enddo
+
+  ! update T
+  !$OMP PARALLEL &
+  !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) &
+  !$OMP PRIVATE(i,j,accu) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, sze
+    t(i) = 0d0
+  enddo
+  !$OMP END DO
+  do i = 1, m_iter
+    !$OMP DO
+    do j = 1, sze
+      t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i))
+    enddo
+    !$OMP END DO
+  enddo
+  !$OMP END PARALLEL
+
+  !print*,'new t',t
+
+  deallocate(ipiv,B,c,zero)
+
+end
+
+! Update all err
+
+subroutine update_all_err(err,all_err,sze,m,iter)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift all the err vectors of the previous iterations to add the new one
+  ! The last err vector is placed in the last position and all the others are
+  ! moved toward the first one.
+  END_DOC
+
+  integer, intent(in)             :: m, iter, sze
+  double precision, intent(in)    :: err(sze)
+  double precision, intent(inout) :: all_err(sze,m)
+  integer                         :: i,j
+  integer                         :: m_iter
+
+  m_iter = min(m,iter)
+
+  ! Shift
+  !$OMP PARALLEL &
+  !$OMP SHARED(m,all_err,err,sze) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do i = 1, m-1
+    !$OMP DO
+    do j = 1, sze
+      all_err(j,i) = all_err(j,i+1)
+    enddo
+    !$OMP END DO
+  enddo
+  
+  ! Debug
+  !print*,'shift err'
+  !do i = 1, m
+  !  print*,i, all_err(:,i)
+  !enddo
+
+  ! New
+  !$OMP DO
+  do i = 1, sze
+    all_err(i,m) = err(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Debug
+  !print*,'Updated err'
+  !do i = 1, m
+  !  print*,i, all_err(:,i)
+  !enddo
+
+end
+
+! Update all t
+
+subroutine update_all_t(t,all_t,sze,m,iter)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift all the t vectors of the previous iterations to add the new one
+  ! The last t vector is placed in the last position and all the others are
+  ! moved toward the first one.
+  END_DOC
+
+  integer, intent(in)             :: m, iter, sze
+  double precision, intent(in)    :: t(sze)
+  double precision, intent(inout) :: all_t(sze,m)
+  integer                         :: i,j
+  integer                         :: m_iter
+
+  m_iter = min(m,iter)
+
+  ! Shift
+  !$OMP PARALLEL &
+  !$OMP SHARED(m,all_t,t,sze) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do i = 1, m-1
+    !$OMP DO
+    do j = 1, sze
+      all_t(j,i) = all_t(j,i+1)
+    enddo
+    !$OMP END DO
+  enddo
+
+  ! New
+  !$OMP DO
+  do i = 1, sze
+    all_t(i,m) = t(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Debug
+  !print*,'Updated t'
+  !do i = 1, m
+  !  print*,i, all_t(:,i)
+  !enddo
+
+end
+
+! Err1
+
+subroutine compute_err1(nO,nV,f_o,f_v,r1,err1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the error vector for the t1
+  END_DOC
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), r1(nO,nV)
+  
+  double precision, intent(out) :: err1(nO,nV)
+
+  integer                       :: i,a
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) &
+  !$OMP PRIVATE(i,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do a = 1, nV
+    do i = 1, nO
+      err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+
+! Err2
+
+subroutine compute_err2(nO,nV,f_o,f_v,r2,err2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the error vector for the t2
+  END_DOC
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(out) :: err2(nO,nO,nV,nV)
+
+  integer                       :: i,j,a,b
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO       
+          err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+
+! Update t
+
+subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: err1(:,:), err2(:,:,:,:)
+  double precision, allocatable   :: tmp_err1(:), tmp_err2(:)
+  double precision, allocatable   :: tmp_t1(:), tmp_t2(:)
+  
+  if (cc_update_method == 'diis') then
+
+    allocate(err1(nO,nV), err2(nO,nO,nV,nV))
+    allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV))
+    allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV))
+
+    ! DIIS T1, it is not always good since the t1 can be small
+    ! That's why there is a call to update the t1 in the standard way
+    ! T1 error tensor
+    !call compute_err1(nO,nV,f_o,f_v,r1,err1)
+    ! Transfo errors and parameters in vectors
+    !tmp_err1 = reshape(err1,(/nO*nV/))
+    !tmp_t1   = reshape(t1  ,(/nO*nV/))
+    ! Add the error and parameter vectors with those of the previous iterations
+    !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1)
+    !call update_all_t  (tmp_t1  ,all_t1  ,nO*nV,cc_diis_depth,nb_iter+1)
+    ! Diis and reshape T as a tensor
+    !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1)
+    !t1 = reshape(tmp_t1  ,(/nO,nV/))
+    call update_t1(nO,nV,f_o,f_v,r1,t1)
+
+    ! DIIS T2
+    ! T2 error tensor
+    call compute_err2(nO,nV,f_o,f_v,r2,err2)
+    ! Transfo errors and parameters in vectors
+    tmp_err2 = reshape(err2,(/nO*nO*nV*nV/))
+    tmp_t2   = reshape(t2  ,(/nO*nO*nV*nV/))
+    ! Add the error and parameter vectors with those of the previous iterations
+    call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+    call update_all_t  (tmp_t2  ,all_t2  ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+    ! Diis and reshape T as a tensor
+    call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2)
+    t2 = reshape(tmp_t2  ,(/nO,nO,nV,nV/))
+
+    deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2)
+
+  ! Standard update as T = T - Delta
+  elseif (cc_update_method == 'none') then
+     
+    call update_t1(nO,nV,f_o,f_v,r1,t1)
+    call update_t2(nO,nV,f_o,f_v,r2,t2)
+    
+  else
+    print*,'Unkonw cc_method_method: '//cc_update_method
+  endif
+  
+end
+
+! Update t v2
+
+subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: all_t(:,:), all_err(:,:), tmp_t(:)
+  double precision, allocatable   :: err1(:,:), err2(:,:,:,:)
+  double precision, allocatable   :: tmp_err1(:), tmp_err2(:)
+  double precision, allocatable   :: tmp_t1(:), tmp_t2(:)
+
+  integer                         :: i,j
+  
+  ! Allocate
+  allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+  allocate(tmp_t(nO*nV+nO*nO*nV*nV))
+  allocate(err1(nO,nV), err2(nO,nO,nV,nV))
+  allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV))
+  allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV))
+
+  ! Compute the errors and reshape them as vector
+  call compute_err1(nO,nV,f_o,f_v,r1,err1)
+  call compute_err2(nO,nV,f_o,f_v,r2,err2)
+  tmp_err1 = reshape(err1,(/nO*nV/))
+  tmp_err2 = reshape(err2,(/nO*nO*nV*nV/))
+  tmp_t1   = reshape(t1  ,(/nO*nV/))
+  tmp_t2   = reshape(t2  ,(/nO*nO*nV*nV/))
+  
+  ! Update the errors and parameters for the diis
+  call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1)
+  call update_all_t  (tmp_t1  ,all_t1  ,nO*nV,cc_diis_depth,nb_iter+1)
+  call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+  call update_all_t  (tmp_t2  ,all_t2  ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  ! Gather the different parameters and errors
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,&
+  !$OMP all_t,all_t1,all_t2) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nV
+      all_err(i,j) = all_err1(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO
+    do i = 1, nO*nO*nV*nV
+      all_err(i+nO*nV,j) = all_err2(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nV
+      all_t(i,j) = all_t1(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nO*nV*nV
+      all_t(i+nO*nV,j) = all_t2(i,j)
+    enddo
+    !$OMP END DO
+  enddo
+  !$OMP END PARALLEL
+  
+  ! Diis
+  call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t)
+
+  ! Split the resulting vector
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    tmp_t1(i) = tmp_t(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    tmp_t2(i) = tmp_t(i+nO*nV) 
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Reshape as tensors
+  t1 = reshape(tmp_t1 ,(/nO,nV/))
+  t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/))
+
+  ! Deallocate
+  deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err)
+
+end
+
+! Update t v3
+
+subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV)
+  double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: tmp(:)
+
+  integer                         :: i,j
+  
+  ! Allocate
+  allocate(tmp(nO*nV+nO*nO*nV*nV))
+
+  ! Compute the errors
+  call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV))
+  call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV))
+  
+  ! Update the errors and parameters for the diis
+  call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp,t1,t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    tmp(i) = t1(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    tmp(i+nO*nV) = t2(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+   
+  call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  ! Diis
+  call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp)
+
+  ! Split the resulting vector
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp,t1,t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    t1(i) = tmp(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    t2(i) = tmp(i+nO*nV) 
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Deallocate
+  deallocate(tmp)
+
+end
diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f
new file mode 100644
index 00000000..33e0cbae
--- /dev/null
+++ b/src/utils_cc/energy.irp.f
@@ -0,0 +1,13 @@
+subroutine det_energy(det,energy)
+
+  implicit none
+
+  integer(bit_kind), intent(in) :: det
+
+  double precision, intent(out) :: energy
+
+  call i_H_j(det,det,N_int,energy)
+
+  energy = energy + nuclear_repulsion
+  
+end
diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f
new file mode 100644
index 00000000..42acdf78
--- /dev/null
+++ b/src/utils_cc/guess_t.irp.f
@@ -0,0 +1,213 @@
+! T1
+
+subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+
+  ! inout
+  double precision, intent(out) :: t1(nO, nV)
+
+  ! internal
+  integer                       :: i,a
+
+  if (trim(cc_guess_t1) == 'none') then
+     t1 = 0d0
+  else if (trim(cc_guess_t1) == 'MP') then
+    do a = 1, nV
+      do i = 1, nO
+        t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess)
+      enddo
+    enddo
+  else if (trim(cc_guess_t1) == 'read') then
+    call read_t1(nO,nV,t1)
+  else
+    print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1)
+    call abort
+  endif
+  
+end
+
+! T2
+
+subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV)
+
+  ! inout
+  double precision, intent(out) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+
+  if (trim(cc_guess_t2) == 'none') then
+    t2 = 0d0
+  else if (trim(cc_guess_t2) == 'MP') then
+    do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, nO
+            t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess)
+          enddo
+        enddo
+      enddo
+    enddo
+  else if (trim(cc_guess_t2) == 'read') then
+    call read_t2(nO,nV,t2)
+  else
+    print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2)
+    call abort
+  endif
+  
+end
+
+! T1
+
+subroutine write_t1(nO,nV,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Write the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)          :: nO, nV
+  double precision, intent(in) :: t1(nO, nV)
+
+  ! internal
+  integer                      :: i,a
+
+  if (cc_write_t1) then
+    open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1')
+    do a = 1, nV
+      do i = 1, nO
+         write(11,'(F20.12)') t1(i,a)
+      enddo
+    enddo
+    close(11)
+  endif
+  
+end
+
+! T2
+
+subroutine write_t2(nO,nV,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Write the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)          :: nO, nV
+  double precision, intent(in) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                      :: i,j,a,b
+
+  if (cc_write_t2) then
+    open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2')
+    do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, nO
+             write(11,'(F20.12)') t2(i,j,a,b)
+          enddo
+        enddo
+      enddo
+    enddo
+    close(11)
+  endif
+  
+end
+
+! T1
+
+subroutine read_t1(nO,nV,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Read the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(out) :: t1(nO, nV)
+
+  ! internal
+  integer                       :: i,a
+  logical                       :: ok
+
+  inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok)
+  if (.not. ok) then
+     print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1'
+     print*, 'Do a first calculation with cc_write_t1 = True'
+     print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read'
+     call abort
+  endif
+  open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1')
+  do a = 1, nV
+    do i = 1, nO
+       read(11,'(F20.12)') t1(i,a)
+    enddo
+  enddo
+  close(11)
+  
+end
+
+! T2
+
+subroutine read_t2(nO,nV,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Read the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(out) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+  logical                       :: ok
+
+  inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok)
+  if (.not. ok) then
+     print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1'
+     print*, 'Do a first calculation with cc_write_t2 = True'
+     print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read'
+     call abort
+  endif
+  open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2')
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+           read(11,'(F20.12)') t2(i,j,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  close(11)
+  
+end
diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f
new file mode 100644
index 00000000..9e244d82
--- /dev/null
+++ b/src/utils_cc/mo_integrals_cc.irp.f
@@ -0,0 +1,1256 @@
+! F
+
+subroutine gen_f_space(det,n1,n2,list1,list2,f)
+
+  implicit none
+
+  integer, intent(in)           :: n1,n2
+  integer, intent(in)           :: list1(n1),list2(n2)
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  double precision, intent(out) :: f(n1,n2)
+
+  double precision, allocatable :: tmp_F(:,:)
+  integer                       :: i1,i2,idx1,idx2
+
+  allocate(tmp_F(mo_num,mo_num))
+  
+  call get_fock_matrix_spin(det,1,tmp_F)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) &
+  !$OMP PRIVATE(idx1,idx2,i1,i2)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do i2 = 1, n2
+    do i1 = 1, n1
+      idx2 = list2(i2)
+      idx1 = list1(i1)
+      f(i1,i2) = tmp_F(idx1,idx2)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_F)
+  
+end
+
+! V
+
+subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
+
+  implicit none
+
+  integer, intent(in)           :: n1,n2,n3,n4
+  integer, intent(in)           :: list1(n1),list2(n2),list3(n3),list4(n4)
+  double precision, intent(out) :: v(n1,n2,n3,n4)
+
+  integer                       :: i1,i2,i3,i4,idx1,idx2,idx3,idx4
+  double precision              :: get_two_e_integral
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) &
+  !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i4 = 1, n4
+    do i3 = 1, n3
+      do i2 = 1, n2
+        do i1 = 1, n1
+          idx4 = list4(i4)
+          idx3 = list3(i3)
+          idx2 = list2(i2)
+          idx1 = list1(i1)
+          v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
+
+! full
+
+BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
+
+  implicit none
+
+  integer          :: i,j,k,l
+  double precision :: get_two_e_integral
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) &
+  !$OMP PRIVATE(i,j,k,l) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do l = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do i = 1, mo_num
+          cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+       
+END_PROVIDER
+
+! oooo
+
+BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo)
+
+END_PROVIDER
+
+! vooo
+
+BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo)
+
+END_PROVIDER
+
+! ovoo
+
+BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo)
+
+END_PROVIDER
+
+! oovo
+
+BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo)
+
+END_PROVIDER
+
+! ooov
+
+BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov)
+
+END_PROVIDER
+
+! vvoo
+
+BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo)
+
+END_PROVIDER
+
+! vovo
+
+BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo)
+
+END_PROVIDER
+
+! voov
+
+BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov)
+
+END_PROVIDER
+
+! ovvo
+
+BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo)
+
+END_PROVIDER
+
+! ovov
+
+BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov)
+
+END_PROVIDER
+
+! oovv
+
+BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv)
+
+END_PROVIDER
+
+! vvvo
+
+BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo)
+
+END_PROVIDER
+
+! vvov
+
+BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov)
+
+END_PROVIDER
+
+! vovv
+
+BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv)
+
+END_PROVIDER
+
+! ovvv
+
+BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv)
+
+END_PROVIDER
+
+! vvvv
+
+BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv)
+
+END_PROVIDER
+
+! ppqq
+
+BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <pp|qq> integrals for general MOs (excepted core and deleted ones)
+  END_DOC
+
+  integer                       :: p,q
+  double precision, allocatable :: tmp_v(:,:,:,:)
+
+  allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo))
+
+  call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v)
+  
+  do q = 1, cc_n_mo
+    do p = 1, cc_n_mo
+      cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q)
+    enddo
+  enddo
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+
+! aaii
+
+BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <aa|ii> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do i = 1, cc_nOa
+    do a = 1, cc_nVa
+      cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i)
+    enddo
+  enddo
+
+  FREE cc_space_v_vvoo
+
+END_PROVIDER
+
+! iiaa
+
+BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ii|aa> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_oovv
+
+END_PROVIDER
+
+! iijj
+
+BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ii|jj> integrals
+  ! i,j: occupied MO
+  END_DOC
+
+  integer :: i,j
+
+  do j = 1, cc_nOa
+    do i = 1, cc_nOa
+      cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j)
+    enddo
+  enddo
+
+  FREE cc_space_v_oooo
+
+END_PROVIDER
+
+! aabb
+
+BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <aa|bb> integrals
+  ! a,b: virtual MO
+  END_DOC
+
+  integer :: a,b
+
+  do b = 1, cc_nVa
+    do a = 1, cc_nVa
+      cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b)
+    enddo
+  enddo
+
+  FREE cc_space_v_vvvv
+  
+END_PROVIDER
+
+! iaia
+
+BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ia|ia> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_ovov
+
+END_PROVIDER
+
+! iaai
+
+BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ia|ai> integrals
+  ! a: virtual MO
+  ! i: inactive MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i)
+    enddo
+  enddo
+
+  FREE cc_space_v_ovvo
+
+END_PROVIDER
+
+! aiia
+
+BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ai|ia> integrals
+  ! a: virtual MO
+  ! i: inactive MO
+  END_DOC
+
+  integer :: a,i
+
+  do i = 1, cc_nOa
+    do a = 1, cc_nVa
+      cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_voov
+
+END_PROVIDER
+
+! oovv
+
+BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  double precision, allocatable :: tmp_v(:,:,:,:)
+  integer :: i,j,a,b
+
+  allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa))
+  
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) &
+  !$OMP PRIVATE(i,j,a,b)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do b = 1, cc_nVa
+    do a = 1, cc_nVa
+      do j = 1, cc_nOa
+        do i = 1, cc_nOa
+          cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+
+! vvoo
+
+BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  double precision, allocatable :: tmp_v(:,:,:,:)
+  integer :: i,j,a,b
+
+  allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa))
+  
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) &
+  !$OMP PRIVATE(i,j,a,b)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do j = 1, cc_nOa
+    do i = 1, cc_nOa
+      do b = 1, cc_nVa
+        do a = 1, cc_nVa
+          cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+
+! F_oo
+
+BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo)
+
+END_PROVIDER
+
+! F_ov
+
+BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov)
+
+END_PROVIDER
+
+! F_vo
+
+BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo)
+
+END_PROVIDER
+
+! F_vv
+
+BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv)
+
+END_PROVIDER
+
+! F_o
+
+BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)]
+
+  implicit none
+
+  integer :: i
+
+  do i = 1, cc_nOa
+    cc_space_f_o(i) = cc_space_f_oo(i,i)
+  enddo
+
+END_PROVIDER
+
+! F_v
+
+BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)]
+
+  implicit none
+
+  integer :: i
+
+  do i = 1, cc_nVa
+    cc_space_f_v(i) = cc_space_f_vv(i,i)
+  enddo
+
+END_PROVIDER
+
+! Shift
+
+subroutine shift_idx_spin(s,n_S,shift)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift for the partitionning alpha/beta of the spin orbitals
+  ! n_S(1): number of spin alpha in the correspondong list
+  ! n_S(2): number of spin beta in the correspondong list
+  END_DOC
+
+  integer, intent(in)  :: s, n_S(2)
+  integer, intent(out) :: shift
+
+  if (s == 1) then
+    shift = 0
+  else
+    shift = n_S(1)
+  endif
+  
+end
+
+! F
+
+subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Fock matrix corresponding to two lists of spin orbitals.
+  ! Ex: occ/occ, occ/vir,...
+  END_DOC
+  
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  integer, intent(in)           :: n1,n2, n1_S(2), n2_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2)
+  integer, intent(in)           :: dim1, dim2
+  
+  double precision, intent(out) :: f(dim1, dim2)
+
+  double precision, allocatable :: tmp_F(:,:)
+  integer                       :: i,j, idx_i,idx_j,i_shift,j_shift
+  integer                       :: tmp_i,tmp_j
+  integer                       :: si,sj,s
+
+  allocate(tmp_F(mo_num,mo_num))
+  
+  do sj = 1, 2
+    call shift_idx_spin(sj,n2_S,j_shift)
+    do si = 1, 2
+      call shift_idx_spin(si,n1_S,i_shift)
+      s = si + sj
+
+      if (s == 2 .or. s == 4) then
+        call get_fock_matrix_spin(det,sj,tmp_F)
+      else
+        do j = 1, mo_num
+          do i = 1, mo_num
+            tmp_F(i,j) = 0d0
+          enddo
+        enddo
+      endif
+
+      do tmp_j = 1, n2_S(sj)
+        j = list2(tmp_j,sj)
+        idx_j = tmp_j + j_shift
+        do tmp_i = 1, n1_S(si)
+          i = list1(tmp_i,si)
+          idx_i = tmp_i + i_shift
+          f(idx_i,idx_j) = tmp_F(i,j)
+        enddo
+      enddo
+
+    enddo
+  enddo
+  
+  deallocate(tmp_F)
+  
+end
+
+! Get F
+
+subroutine get_fock_matrix_spin(det,s,f)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Fock matrix alpha or beta of an arbitrary det
+  END_DOC
+  
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  integer, intent(in)           :: s
+  
+  double precision, intent(out) :: f(mo_num,mo_num)
+  
+  integer                       :: p,q,i,s1,s2
+  integer(bit_kind)             :: res(N_int,2)
+  logical                       :: ok
+  double precision              :: mo_two_e_integral
+
+  if (s == 1) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = 2
+    s2 = 1
+  endif
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) &
+  !$OMP PRIVATE(p,q,ok,i,res)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do q = 1, mo_num
+    do p = 1, mo_num
+      f(p,q) = mo_one_e_integrals(p,q)
+      do i = 1, mo_num
+        call apply_hole(det, s1, i, res, ok, N_int)
+        if (ok) then
+          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
+        endif
+      enddo
+      do i = 1, mo_num
+        call apply_hole(det, s2, i, res, ok, N_int)
+        if (ok) then
+          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i)
+        endif
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+    
+end
+
+! V
+
+subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3, dim4
+  double precision, intent(out) :: v(dim1,dim2,dim3,dim4)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_k,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) &
+  !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, &
+  !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,&
+  !$OMP tmp_i,tmp_j,tmp_k,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
+    
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+            
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+            
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          else
+             !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       v(idx_i,idx_j,idx_k,idx_l) = 0d0
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          endif
+          
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+
+! V_3idx
+
+subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_l(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_k
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_l <= n4_S(1)) then
+    sl = 1
+  else
+    sl = 2
+  endif
+  call shift_idx_spin(sl,n4_S,l_shift)
+  tmp_l = idx_l - l_shift
+  l = list4(tmp_l,sl)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) &
+  !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, &
+  !$OMP i,j,k,idx_i,idx_j,idx_k,&
+  !$OMP tmp_i,tmp_j,tmp_k)&
+  !$OMP DEFAULT(NONE)
+  
+  do sk = 1, 2
+    call shift_idx_spin(sk,n3_S,k_shift)
+    do sj = 1, 2
+      call shift_idx_spin(sj,n2_S,j_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_l(idx_i,idx_j,idx_k) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+
+! V_3idx_ij_l
+
+subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_k(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_k <= n3_S(1)) then
+    sk = 1
+  else
+    sk = 2
+  endif
+  call shift_idx_spin(sk,n3_S,k_shift)
+  tmp_k = idx_k - k_shift
+  k = list3(tmp_k,sk)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) &
+  !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, &
+  !$OMP i,j,l,idx_i,idx_j,idx_l,&
+  !$OMP tmp_i,tmp_j,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sj = 1, 2
+      call shift_idx_spin(sj,n2_S,j_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_k(idx_i,idx_j,idx_l) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+
+! V_3idx_i_kl
+
+subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_j(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_k,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_j <= n2_S(1)) then
+    sj = 1
+  else
+    sj = 2
+  endif
+  call shift_idx_spin(sj,n2_S,j_shift)
+  tmp_j = idx_j - j_shift
+  j = list2(tmp_j,sj)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) &
+  !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, &
+  !$OMP i,k,l,idx_i,idx_k,idx_l,&
+  !$OMP tmp_i,tmp_k,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_j(idx_i,idx_k,idx_l) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
diff --git a/src/utils_cc/occupancy.irp.f b/src/utils_cc/occupancy.irp.f
new file mode 100644
index 00000000..c6139bb3
--- /dev/null
+++ b/src/utils_cc/occupancy.irp.f
@@ -0,0 +1,328 @@
+! N spin orb
+
+subroutine extract_n_spin(det,n)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals
+  ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb)
+  END_DOC
+
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: n(4)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si
+  logical                       :: ok, is_core, is_del
+
+  ! Init
+  n = 0
+
+  ! Loop over the spin
+  do si = 1, 2
+    do i = 1, mo_num
+      call apply_hole(det, si, i, res, ok, N_int)
+      
+      ! in core ?
+      if (is_core(i)) cycle
+      ! in del ?
+      if (is_del(i)) cycle
+      
+      if (ok) then
+        ! particle
+        n(si) = n(si) + 1
+      else
+        ! hole
+        n(si+2) = n(si+2) + 1
+      endif
+    enddo
+  enddo
+
+  !print*,n(1),n(2),n(3),n(4)
+
+end
+
+! Spin
+
+subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals
+  ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb)
+  END_DOC
+  
+  integer, intent(in)           :: nO_m, nV_m
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: list_occ(nO_m,2), list_vir(nV_m,2)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si, idx_o, idx_v, idx_i, idx_b
+  logical                       :: ok, is_core, is_del
+
+  list_occ = 0
+  list_vir = 0
+
+  ! List of occ/vir alpha/beta
+
+  ! occ alpha -> list_occ(:,1)
+  ! occ beta -> list_occ(:,2)
+  ! vir alpha -> list_vir(:,1)
+  ! vir beta -> list_vir(:,2)
+
+  ! Loop over the spin 
+  do si = 1, 2
+    ! tmp idx
+    idx_o = 1
+    idx_v = 1
+    do i = 1, mo_num
+      call apply_hole(det, si, i, res, ok, N_int)
+
+      ! in core ?
+      if (is_core(i)) cycle
+      ! in del ?
+      if (is_del(i)) cycle
+            
+      if (ok) then
+        ! particle
+        list_occ(idx_o,si) = i
+        idx_o = idx_o + 1
+      else
+        ! hole
+        list_vir(idx_v,si) = i
+        idx_v = idx_v + 1
+      endif
+    enddo
+  enddo
+
+end
+
+! Space
+
+subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the the list of occupied and virtual alpha spin orbitals
+  END_DOC
+  
+  integer, intent(in)           :: nO, nV
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: list_occ(nO), list_vir(nV)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si, idx_o, idx_v, idx_i, idx_b
+  logical                       :: ok, is_core, is_del
+  
+  if (elec_alpha_num /= elec_beta_num) then
+    print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort'
+    call abort
+  endif
+
+  list_occ = 0
+  list_vir = 0
+
+  ! List of occ/vir alpha
+
+  ! occ alpha -> list_occ(:,1)
+  ! vir alpha -> list_vir(:,1)
+
+  ! tmp idx
+  idx_o = 1
+  idx_v = 1
+  do i = 1, mo_num
+    call apply_hole(det, 1, i, res, ok, N_int)
+
+    ! in core ?
+    if (is_core(i)) cycle
+    ! in del ?
+    if (is_del(i)) cycle
+
+    if (ok) then
+      ! particle
+      list_occ(idx_o) = i
+      idx_o = idx_o + 1
+    else
+      ! hole
+      list_vir(idx_v) = i
+      idx_v = idx_v + 1
+    endif
+  enddo
+
+end
+
+! is_core
+
+function is_core(i)
+
+  implicit none
+
+  BEGIN_DOC
+  ! True if the orbital i is a core orbital
+  END_DOC
+
+  integer, intent(in) :: i
+  logical             :: is_core
+
+  integer             :: j
+
+  ! Init
+  is_core = .False.
+
+  ! Search
+  do j = 1, dim_list_core_orb
+    if (list_core(j) == i) then
+      is_core = .True.
+      exit
+    endif
+  enddo
+
+end
+
+! is_del
+
+function is_del(i)
+
+  implicit none
+
+  BEGIN_DOC
+  ! True if the orbital i is a deleted orbital
+  END_DOC
+
+  integer, intent(in) :: i
+  logical             :: is_del
+
+  integer             :: j
+
+  ! Init
+  is_del = .False.
+
+  ! Search
+  do j = 1, dim_list_core_orb
+    if (list_core(j) == i) then
+      is_del = .True.
+      exit
+    endif
+  enddo
+
+end
+
+! N orb
+
+BEGIN_PROVIDER [integer, cc_nO_m]
+&BEGIN_PROVIDER [integer, cc_nOa]
+&BEGIN_PROVIDER [integer, cc_nOb]
+&BEGIN_PROVIDER [integer, cc_nOab]
+&BEGIN_PROVIDER [integer, cc_nV_m]
+&BEGIN_PROVIDER [integer, cc_nVa]
+&BEGIN_PROVIDER [integer, cc_nVb]
+&BEGIN_PROVIDER [integer, cc_nVab]
+&BEGIN_PROVIDER [integer, cc_n_mo]
+&BEGIN_PROVIDER [integer, cc_nO_S, (2)]
+&BEGIN_PROVIDER [integer, cc_nV_S, (2)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det
+  ! a: alpha, b: beta
+  ! nO_m: max(a,b) occupied 
+  ! nOa: nb a occupied 
+  ! nOb: nb b occupied 
+  ! nOab: nb a+b occupied 
+  ! nV_m: max(a,b) virtual 
+  ! nVa: nb a virtual 
+  ! nVb: nb b virtual 
+  ! nVab: nb a+b virtual 
+  END_DOC
+
+  integer :: n_spin(4)
+
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  call extract_n_spin(psi_det(1,1,cc_ref),n_spin)
+
+  cc_nOa  = n_spin(1)
+  cc_nOb  = n_spin(2)
+  cc_nOab = cc_nOa + cc_nOb    !n_spin(1) + n_spin(2)
+  cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2))
+  cc_nVa  = n_spin(3)
+  cc_nVb  = n_spin(4)
+  cc_nVab = cc_nVa + cc_nVb    !n_spin(3) + n_spin(4)
+  cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4))
+  cc_n_mo = cc_nVa + cc_nVb    !n_spin(1) + n_spin(3)
+  cc_nO_S = (/cc_nOa,cc_nOb/)
+  cc_nV_S = (/cc_nVa,cc_nVb/)
+
+END_PROVIDER
+
+! General
+
+BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of general orbitals without core and deleted ones
+  END_DOC
+
+  integer :: i,j
+  logical :: is_core, is_del
+  
+  j = 1
+  do i = 1, mo_num
+    ! in core ?
+    if (is_core(i)) cycle
+    ! in del ?
+    if (is_del(i)) cycle
+    cc_list_gen(j) = i
+    j = j+1
+  enddo
+
+END_PROVIDER
+
+! Space
+
+BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)]
+&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of occupied and virtual spatial orbitals without core and deleted ones
+  END_DOC
+
+  call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir)
+
+END_PROVIDER
+
+! Spin
+
+BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)]
+&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)]
+&BEGIN_PROVIDER [logical, cc_ref_is_open_shell]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of occupied and virtual spin orbitals without core and deleted ones
+  END_DOC
+
+  integer :: i
+
+  call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin)
+
+  cc_ref_is_open_shell = .False.
+  do i = 1, cc_nO_m
+    if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then
+       cc_ref_is_open_shell = .True.
+    endif
+  enddo
+
+
+END_PROVIDER
diff --git a/src/utils_cc/org/TANGLE_org_mode.sh b/src/utils_cc/org/TANGLE_org_mode.sh
new file mode 100755
index 00000000..059cbe7d
--- /dev/null
+++ b/src/utils_cc/org/TANGLE_org_mode.sh
@@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
diff --git a/src/utils_cc/org/diis.org b/src/utils_cc/org/diis.org
new file mode 100644
index 00000000..c48b917e
--- /dev/null
+++ b/src/utils_cc/org/diis.org
@@ -0,0 +1,574 @@
+* DIIS
+https://hal.archives-ouvertes.fr/hal-02492983/document
+Maxime Chupin, Mi-Song Dupuy, Guillaume Legendre, Eric Séré. Convergence analysis of adaptive
+DIIS algorithms witerh application to electronic ground state calculations. 
+ESAIM: Mathematical Modelling and Numerical Analysis, EDP Sciences, 2021, 55 (6), pp.2785 - 2825. 10.1051/m2an/2021069ff.ffhal-02492983v5
+
+t_{k+1} = g(t_k)
+err_k = f(t_k) = t_{k+1} - t_k
+
+m_k = min(m,k)
+m maximal depth
+t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i})
+\sum_{i=0}^{m_k} c_i^k = 1
+
+b_{ij}^k = < err^{k-m_k+j}, err^{k-m_k+i} >
+
+(b   -1) ( c^k    ) = (  0 )
+(-1   0) ( \lambda)   ( -1 )
+
+lambda is used to put the constraint \sum_{i=0}^{m_k} c_i^k = 1
+
+In: t_0, err_0, m
+err_0 = g(t_0)
+k = 0
+m_k = 0
+while ||err_k|| > CC
+  A.x=b
+  t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i})
+  err_{k+1} = f(t_{k+1})
+  m_{k+1} = min(m_k+1,m)
+  k = k +1
+end
+
+* Code
+#+begin_src f90 :comments org :tangle diis.irp.f 
+subroutine diis_cc(all_err,all_t,sze,m,iter,t)
+
+  implicit none
+
+  BEGIN_DOC
+  ! DIIS. Take the error vectors and the amplitudes of the previous
+  ! iterations to compute the new amplitudes
+  END_DOC
+  
+  ! {err_i}_{i=1}^{m_it} -> B -> c
+  ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1}
+
+  integer, intent(in)             :: m,iter,sze
+  double precision, intent(in)    :: all_err(sze,m)
+  double precision, intent(in)    :: all_t(sze,m)
+  
+  double precision, intent(out)   :: t(sze)
+  
+  double precision, allocatable   :: B(:,:), c(:), zero(:)
+  integer                         :: m_iter
+  integer                         :: i,j,k
+  integer                         :: info
+  integer, allocatable            :: ipiv(:)
+  double precision                :: accu
+  
+  m_iter = min(m,iter)
+  !print*,'m_iter',m_iter
+  allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1))
+  allocate(ipiv(m+1))
+
+  ! B(i,j) =  < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us
+  B = 0d0
+  !$OMP PARALLEL &
+  !$OMP SHARED(B,m,m_iter,sze,all_err) &
+  !$OMP PRIVATE(i,j,k,accu) &
+  !$OMP DEFAULT(NONE)
+  do j = 1, m_iter
+    do i = 1, m_iter
+      accu = 0d0
+      !$OMP DO
+      do k = 1, sze
+        ! the errors of the ith iteration are in all_err(:,m+1-i)
+        accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j)
+      enddo
+      !$OMP END DO NOWAIT
+      !$OMP CRITICAL
+      B(i,j) = B(i,j) + accu
+      !$OMP END CRITICAL
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+  do i = 1, m_iter
+    B(i,m_iter+1) = -1
+  enddo
+  do j = 1, m_iter
+    B(m_iter+1,j) = -1
+  enddo
+  ! Debug
+  !print*,'B'
+  !do i = 1, m_iter+1
+  !  write(*,'(100(F10.6))') B(i,:)
+  !enddo
+
+  ! (0 0 .... 0 -1)
+  zero = 0d0
+  zero(m_iter+1) = -1d0
+
+  ! Solve B.c = zero
+  call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info)
+  if (info /= 0) then
+    print*,'DIIS error in dgesv:', info
+    call abort
+  endif
+  ! c corresponds to the m_iter first solutions
+  c = zero(1:m_iter)
+  ! Debug
+  !print*,'c',c
+  !print*,'all_t' 
+  !do i = 1, m
+  !  write(*,'(100(F10.6))') all_t(:,i)
+  !enddo
+  !print*,'all_err' 
+  !do i = 1, m
+  !  write(*,'(100(F10.6))') all_err(:,i)
+  !enddo
+
+  ! update T
+  !$OMP PARALLEL &
+  !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) &
+  !$OMP PRIVATE(i,j,accu) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, sze
+    t(i) = 0d0
+  enddo
+  !$OMP END DO
+  do i = 1, m_iter
+    !$OMP DO
+    do j = 1, sze
+      t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i))
+    enddo
+    !$OMP END DO
+  enddo
+  !$OMP END PARALLEL
+
+  !print*,'new t',t
+
+  deallocate(ipiv,B,c,zero)
+
+end
+#+end_src
+
+** Update all err
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine update_all_err(err,all_err,sze,m,iter)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift all the err vectors of the previous iterations to add the new one
+  ! The last err vector is placed in the last position and all the others are
+  ! moved toward the first one.
+  END_DOC
+
+  integer, intent(in)             :: m, iter, sze
+  double precision, intent(in)    :: err(sze)
+  double precision, intent(inout) :: all_err(sze,m)
+  integer                         :: i,j
+  integer                         :: m_iter
+
+  m_iter = min(m,iter)
+
+  ! Shift
+  !$OMP PARALLEL &
+  !$OMP SHARED(m,all_err,err,sze) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do i = 1, m-1
+    !$OMP DO
+    do j = 1, sze
+      all_err(j,i) = all_err(j,i+1)
+    enddo
+    !$OMP END DO
+  enddo
+  
+  ! Debug
+  !print*,'shift err'
+  !do i = 1, m
+  !  print*,i, all_err(:,i)
+  !enddo
+
+  ! New
+  !$OMP DO
+  do i = 1, sze
+    all_err(i,m) = err(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Debug
+  !print*,'Updated err'
+  !do i = 1, m
+  !  print*,i, all_err(:,i)
+  !enddo
+
+end
+#+end_src
+
+** Update all t
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine update_all_t(t,all_t,sze,m,iter)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift all the t vectors of the previous iterations to add the new one
+  ! The last t vector is placed in the last position and all the others are
+  ! moved toward the first one.
+  END_DOC
+
+  integer, intent(in)             :: m, iter, sze
+  double precision, intent(in)    :: t(sze)
+  double precision, intent(inout) :: all_t(sze,m)
+  integer                         :: i,j
+  integer                         :: m_iter
+
+  m_iter = min(m,iter)
+
+  ! Shift
+  !$OMP PARALLEL &
+  !$OMP SHARED(m,all_t,t,sze) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do i = 1, m-1
+    !$OMP DO
+    do j = 1, sze
+      all_t(j,i) = all_t(j,i+1)
+    enddo
+    !$OMP END DO
+  enddo
+
+  ! New
+  !$OMP DO
+  do i = 1, sze
+    all_t(i,m) = t(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Debug
+  !print*,'Updated t'
+  !do i = 1, m
+  !  print*,i, all_t(:,i)
+  !enddo
+
+end
+#+end_src
+
+** Err
+*** Err1
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine compute_err1(nO,nV,f_o,f_v,r1,err1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the error vector for the t1
+  END_DOC
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), r1(nO,nV)
+  
+  double precision, intent(out) :: err1(nO,nV)
+
+  integer                       :: i,a
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) &
+  !$OMP PRIVATE(i,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do a = 1, nV
+    do i = 1, nO
+      err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+#+end_src
+
+*** Err2
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine compute_err2(nO,nV,f_o,f_v,r2,err2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the error vector for the t2
+  END_DOC
+
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(out) :: err2(nO,nO,nV,nV)
+
+  integer                       :: i,j,a,b
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO       
+          err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+end
+#+end_src
+
+* Gather call diis
+** Update t
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: err1(:,:), err2(:,:,:,:)
+  double precision, allocatable   :: tmp_err1(:), tmp_err2(:)
+  double precision, allocatable   :: tmp_t1(:), tmp_t2(:)
+  
+  if (cc_update_method == 'diis') then
+
+    allocate(err1(nO,nV), err2(nO,nO,nV,nV))
+    allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV))
+    allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV))
+
+    ! DIIS T1, it is not always good since the t1 can be small
+    ! That's why there is a call to update the t1 in the standard way
+    ! T1 error tensor
+    !call compute_err1(nO,nV,f_o,f_v,r1,err1)
+    ! Transfo errors and parameters in vectors
+    !tmp_err1 = reshape(err1,(/nO*nV/))
+    !tmp_t1   = reshape(t1  ,(/nO*nV/))
+    ! Add the error and parameter vectors with those of the previous iterations
+    !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1)
+    !call update_all_t  (tmp_t1  ,all_t1  ,nO*nV,cc_diis_depth,nb_iter+1)
+    ! Diis and reshape T as a tensor
+    !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1)
+    !t1 = reshape(tmp_t1  ,(/nO,nV/))
+    call update_t1(nO,nV,f_o,f_v,r1,t1)
+
+    ! DIIS T2
+    ! T2 error tensor
+    call compute_err2(nO,nV,f_o,f_v,r2,err2)
+    ! Transfo errors and parameters in vectors
+    tmp_err2 = reshape(err2,(/nO*nO*nV*nV/))
+    tmp_t2   = reshape(t2  ,(/nO*nO*nV*nV/))
+    ! Add the error and parameter vectors with those of the previous iterations
+    call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+    call update_all_t  (tmp_t2  ,all_t2  ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+    ! Diis and reshape T as a tensor
+    call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2)
+    t2 = reshape(tmp_t2  ,(/nO,nO,nV,nV/))
+
+    deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2)
+
+  ! Standard update as T = T - Delta
+  elseif (cc_update_method == 'none') then
+     
+    call update_t1(nO,nV,f_o,f_v,r1,t1)
+    call update_t2(nO,nV,f_o,f_v,r2,t2)
+    
+  else
+    print*,'Unkonw cc_method_method: '//cc_update_method
+  endif
+  
+end
+ #+end_src
+ 
+** Update t v2
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV)
+  double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: all_t(:,:), all_err(:,:), tmp_t(:)
+  double precision, allocatable   :: err1(:,:), err2(:,:,:,:)
+  double precision, allocatable   :: tmp_err1(:), tmp_err2(:)
+  double precision, allocatable   :: tmp_t1(:), tmp_t2(:)
+
+  integer                         :: i,j
+  
+  ! Allocate
+  allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+  allocate(tmp_t(nO*nV+nO*nO*nV*nV))
+  allocate(err1(nO,nV), err2(nO,nO,nV,nV))
+  allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV))
+  allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV))
+
+  ! Compute the errors and reshape them as vector
+  call compute_err1(nO,nV,f_o,f_v,r1,err1)
+  call compute_err2(nO,nV,f_o,f_v,r2,err2)
+  tmp_err1 = reshape(err1,(/nO*nV/))
+  tmp_err2 = reshape(err2,(/nO*nO*nV*nV/))
+  tmp_t1   = reshape(t1  ,(/nO*nV/))
+  tmp_t2   = reshape(t2  ,(/nO*nO*nV*nV/))
+  
+  ! Update the errors and parameters for the diis
+  call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1)
+  call update_all_t  (tmp_t1  ,all_t1  ,nO*nV,cc_diis_depth,nb_iter+1)
+  call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+  call update_all_t  (tmp_t2  ,all_t2  ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  ! Gather the different parameters and errors
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,&
+  !$OMP all_t,all_t1,all_t2) &
+  !$OMP PRIVATE(i,j) &
+  !$OMP DEFAULT(NONE)
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nV
+      all_err(i,j) = all_err1(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO
+    do i = 1, nO*nO*nV*nV
+      all_err(i+nO*nV,j) = all_err2(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nV
+      all_t(i,j) = all_t1(i,j)
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+  do j = 1, cc_diis_depth
+    !$OMP DO 
+    do i = 1, nO*nO*nV*nV
+      all_t(i+nO*nV,j) = all_t2(i,j)
+    enddo
+    !$OMP END DO
+  enddo
+  !$OMP END PARALLEL
+  
+  ! Diis
+  call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t)
+
+  ! Split the resulting vector
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    tmp_t1(i) = tmp_t(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    tmp_t2(i) = tmp_t(i+nO*nV) 
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Reshape as tensors
+  t1 = reshape(tmp_t1 ,(/nO,nV/))
+  t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/))
+
+  ! Deallocate
+  deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err)
+
+end
+ #+end_src
+ 
+
+** Update t v3
+#+begin_src f90 :comments org :tangle diis.irp.f
+subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+
+  implicit none
+
+  integer, intent(in)             :: nO,nV,nb_iter
+  double precision, intent(in)    :: f_o(nO), f_v(nV)
+  double precision, intent(in)    :: r1(nO,nV), r2(nO,nO,nV,nV)
+  
+  double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV)
+  double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth)
+  double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth)
+
+  double precision, allocatable   :: tmp(:)
+
+  integer                         :: i,j
+  
+  ! Allocate
+  allocate(tmp(nO*nV+nO*nO*nV*nV))
+
+  ! Compute the errors
+  call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV))
+  call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV))
+  
+  ! Update the errors and parameters for the diis
+  call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp,t1,t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    tmp(i) = t1(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    tmp(i+nO*nV) = t2(i)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+   
+  call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1)
+
+  ! Diis
+  call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp)
+
+  ! Split the resulting vector
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,tmp,t1,t2) &
+  !$OMP PRIVATE(i) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do i = 1, nO*nV
+    t1(i) = tmp(i)
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP DO
+  do i = 1, nO*nO*nV*nV
+    t2(i) = tmp(i+nO*nV) 
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  ! Deallocate
+  deallocate(tmp)
+
+end
+ #+end_src
+ 
diff --git a/src/utils_cc/org/energy.org b/src/utils_cc/org/energy.org
new file mode 100644
index 00000000..2ec5c8ef
--- /dev/null
+++ b/src/utils_cc/org/energy.org
@@ -0,0 +1,15 @@
+#+begin_src f90 :comments org :tangle energy.irp.f
+subroutine det_energy(det,energy)
+
+  implicit none
+
+  integer(bit_kind), intent(in) :: det
+
+  double precision, intent(out) :: energy
+
+  call i_H_j(det,det,N_int,energy)
+
+  energy = energy + nuclear_repulsion
+  
+end  
+#+end_src
diff --git a/src/utils_cc/org/guess_t.org b/src/utils_cc/org/guess_t.org
new file mode 100644
index 00000000..9e162242
--- /dev/null
+++ b/src/utils_cc/org/guess_t.org
@@ -0,0 +1,222 @@
+* Guess
+** T1
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), f_ov(nO,nV)
+
+  ! inout
+  double precision, intent(out) :: t1(nO, nV)
+
+  ! internal
+  integer                       :: i,a
+
+  if (trim(cc_guess_t1) == 'none') then
+     t1 = 0d0
+  else if (trim(cc_guess_t1) == 'MP') then
+    do a = 1, nV
+      do i = 1, nO
+        t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess)
+      enddo
+    enddo
+  else if (trim(cc_guess_t1) == 'read') then
+    call read_t1(nO,nV,t1)
+  else
+    print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1)
+    call abort
+  endif
+  
+end  
+#+end_src
+
+** T2
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(in)  :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV)
+
+  ! inout
+  double precision, intent(out) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+
+  if (trim(cc_guess_t2) == 'none') then
+    t2 = 0d0
+  else if (trim(cc_guess_t2) == 'MP') then
+    do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, nO
+            t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess)
+          enddo
+        enddo
+      enddo
+    enddo
+  else if (trim(cc_guess_t2) == 'read') then
+    call read_t2(nO,nV,t2)
+  else
+    print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2)
+    call abort
+  endif
+  
+end  
+#+end_src
+
+* Write
+** T1
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine write_t1(nO,nV,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Write the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)          :: nO, nV
+  double precision, intent(in) :: t1(nO, nV)
+
+  ! internal
+  integer                      :: i,a
+
+  if (cc_write_t1) then
+    open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1')
+    do a = 1, nV
+      do i = 1, nO
+         write(11,'(F20.12)') t1(i,a)
+      enddo
+    enddo
+    close(11)
+  endif
+  
+end  
+#+end_src
+
+** T2
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine write_t2(nO,nV,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Write the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)          :: nO, nV
+  double precision, intent(in) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                      :: i,j,a,b
+
+  if (cc_write_t2) then
+    open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2')
+    do b = 1, nV
+      do a = 1, nV
+        do j = 1, nO
+          do i = 1, nO
+             write(11,'(F20.12)') t2(i,j,a,b)
+          enddo
+        enddo
+      enddo
+    enddo
+    close(11)
+  endif
+  
+end  
+#+end_src
+
+* Read
+** T1
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine read_t1(nO,nV,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Read the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(out) :: t1(nO, nV)
+
+  ! internal
+  integer                       :: i,a
+  logical                       :: ok
+
+  inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok)
+  if (.not. ok) then
+     print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1'
+     print*, 'Do a first calculation with cc_write_t1 = True'
+     print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read'
+     call abort
+  endif
+  open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1')
+  do a = 1, nV
+    do i = 1, nO
+       read(11,'(F20.12)') t1(i,a)
+    enddo
+  enddo
+  close(11)
+  
+end  
+#+end_src
+
+** T2
+#+begin_src f90 :comments org :tangle guess_t.irp.f
+subroutine read_t2(nO,nV,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Read the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)           :: nO, nV
+  double precision, intent(out) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                       :: i,j,a,b
+  logical                       :: ok
+
+  inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok)
+  if (.not. ok) then
+     print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1'
+     print*, 'Do a first calculation with cc_write_t2 = True'
+     print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read'
+     call abort
+  endif
+  open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2')
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+           read(11,'(F20.12)') t2(i,j,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  close(11)
+  
+end  
+#+end_src
diff --git a/src/utils_cc/org/mo_integrals_cc.org b/src/utils_cc/org/mo_integrals_cc.org
new file mode 100644
index 00000000..ff3d229c
--- /dev/null
+++ b/src/utils_cc/org/mo_integrals_cc.org
@@ -0,0 +1,1305 @@
+* mo two e integrals
+** Space
+*** F
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_f_space(det,n1,n2,list1,list2,f)
+
+  implicit none
+
+  integer, intent(in)           :: n1,n2
+  integer, intent(in)           :: list1(n1),list2(n2)
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  double precision, intent(out) :: f(n1,n2)
+
+  double precision, allocatable :: tmp_F(:,:)
+  integer                       :: i1,i2,idx1,idx2
+
+  allocate(tmp_F(mo_num,mo_num))
+  
+  call get_fock_matrix_spin(det,1,tmp_F)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) &
+  !$OMP PRIVATE(idx1,idx2,i1,i2)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do i2 = 1, n2
+    do i1 = 1, n1
+      idx2 = list2(i2)
+      idx1 = list1(i1)
+      f(i1,i2) = tmp_F(idx1,idx2)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_F)
+  
+end
+#+end_src
+
+*** V
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
+
+  implicit none
+
+  integer, intent(in)           :: n1,n2,n3,n4
+  integer, intent(in)           :: list1(n1),list2(n2),list3(n3),list4(n4)
+  double precision, intent(out) :: v(n1,n2,n3,n4)
+
+  integer                       :: i1,i2,i3,i4,idx1,idx2,idx3,idx4
+  double precision              :: get_two_e_integral
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) &
+  !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do i4 = 1, n4
+    do i3 = 1, n3
+      do i2 = 1, n2
+        do i1 = 1, n1
+          idx4 = list4(i4)
+          idx3 = list3(i3)
+          idx2 = list2(i2)
+          idx1 = list1(i1)
+          v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
+** Provider space
+*** V
+**** full
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
+
+  implicit none
+
+  integer          :: i,j,k,l
+  double precision :: get_two_e_integral
+  
+  PROVIDE mo_two_e_integrals_in_map
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) &
+  !$OMP PRIVATE(i,j,k,l) &
+  !$OMP DEFAULT(NONE)
+  
+  !$OMP DO collapse(3)
+  do l = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do i = 1, mo_num
+          cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+       
+END_PROVIDER
+#+end_src
+**** oooo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo)
+
+END_PROVIDER
+#+end_src
+
+**** vooo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo)
+
+END_PROVIDER
+#+end_src
+
+**** ovoo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo)
+
+END_PROVIDER
+#+end_src
+
+**** oovo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo)
+
+END_PROVIDER
+#+end_src
+
+**** ooov
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov)
+
+END_PROVIDER
+#+end_src
+
+**** vvoo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo)
+
+END_PROVIDER
+#+end_src
+
+**** vovo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo)
+
+END_PROVIDER
+#+end_src
+
+**** voov
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov)
+
+END_PROVIDER
+#+end_src
+
+**** ovvo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo)
+
+END_PROVIDER
+#+end_src
+
+**** ovov
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov)
+
+END_PROVIDER
+#+end_src
+
+**** oovv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv)
+
+END_PROVIDER
+#+end_src
+
+**** vvvo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo)
+
+END_PROVIDER
+#+end_src
+
+**** vvov
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov)
+
+END_PROVIDER
+#+end_src
+
+**** vovv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv)
+
+END_PROVIDER
+#+end_src
+
+**** ovvv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv)
+
+END_PROVIDER
+#+end_src
+
+**** vvvv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv)
+
+END_PROVIDER
+#+end_src
+
+**** ppqq
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <pp|qq> integrals for general MOs (excepted core and deleted ones)
+  END_DOC
+
+  integer                       :: p,q
+  double precision, allocatable :: tmp_v(:,:,:,:)
+
+  allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo))
+
+  call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v)
+  
+  do q = 1, cc_n_mo
+    do p = 1, cc_n_mo
+      cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q)
+    enddo
+  enddo
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+#+END_SRC
+
+**** aaii
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <aa|ii> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do i = 1, cc_nOa
+    do a = 1, cc_nVa
+      cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i)
+    enddo
+  enddo
+
+  FREE cc_space_v_vvoo
+
+END_PROVIDER
+#+END_SRC
+
+**** iiaa
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ii|aa> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_oovv
+
+END_PROVIDER
+#+END_SRC
+
+**** iijj
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ii|jj> integrals
+  ! i,j: occupied MO
+  END_DOC
+
+  integer :: i,j
+
+  do j = 1, cc_nOa
+    do i = 1, cc_nOa
+      cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j)
+    enddo
+  enddo
+
+  FREE cc_space_v_oooo
+
+END_PROVIDER
+#+END_SRC
+
+**** aabb
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <aa|bb> integrals
+  ! a,b: virtual MO
+  END_DOC
+
+  integer :: a,b
+
+  do b = 1, cc_nVa
+    do a = 1, cc_nVa
+      cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b)
+    enddo
+  enddo
+
+  FREE cc_space_v_vvvv
+  
+END_PROVIDER
+#+END_SRC
+
+**** iaia
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ia|ia> integrals
+  ! a: virtual MO
+  ! i: occupied MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_ovov
+
+END_PROVIDER
+#+END_SRC
+
+**** iaai
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ia|ai> integrals
+  ! a: virtual MO
+  ! i: inactive MO
+  END_DOC
+
+  integer :: a,i
+
+  do a = 1, cc_nVa
+    do i = 1, cc_nOa
+      cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i)
+    enddo
+  enddo
+
+  FREE cc_space_v_ovvo
+
+END_PROVIDER
+#+END_SRC
+
+**** aiia
+#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! <ai|ia> integrals
+  ! a: virtual MO
+  ! i: inactive MO
+  END_DOC
+
+  integer :: a,i
+
+  do i = 1, cc_nOa
+    do a = 1, cc_nVa
+      cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a)
+    enddo
+  enddo
+
+  FREE cc_space_v_voov
+
+END_PROVIDER
+#+END_SRC
+
+*** W
+**** oovv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)]
+
+  implicit none
+
+  double precision, allocatable :: tmp_v(:,:,:,:)
+  integer :: i,j,a,b
+
+  allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa))
+  
+  call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) &
+  !$OMP PRIVATE(i,j,a,b)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do b = 1, cc_nVa
+    do a = 1, cc_nVa
+      do j = 1, cc_nOa
+        do i = 1, cc_nOa
+          cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+#+end_src
+
+**** vvoo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)]
+
+  implicit none
+
+  double precision, allocatable :: tmp_v(:,:,:,:)
+  integer :: i,j,a,b
+
+  allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa))
+  
+  call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v)
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) &
+  !$OMP PRIVATE(i,j,a,b)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do j = 1, cc_nOa
+    do i = 1, cc_nOa
+      do b = 1, cc_nVa
+        do a = 1, cc_nVa
+          cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(tmp_v)
+
+END_PROVIDER
+#+end_src
+
+*** F
+**** F_oo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo)
+
+END_PROVIDER
+#+end_src
+
+**** F_ov
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov)
+
+END_PROVIDER
+#+end_src
+
+**** F_vo
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo)
+
+END_PROVIDER
+#+end_src
+
+**** F_vv
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)]
+
+  implicit none
+
+  call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv)
+
+END_PROVIDER
+#+end_src
+
+**** F_o
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)]
+
+  implicit none
+
+  integer :: i
+
+  do i = 1, cc_nOa
+    cc_space_f_o(i) = cc_space_f_oo(i,i)
+  enddo
+
+END_PROVIDER
+#+end_src
+
+**** F_v
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)]
+
+  implicit none
+
+  integer :: i
+
+  do i = 1, cc_nVa
+    cc_space_f_v(i) = cc_space_f_vv(i,i)
+  enddo
+
+END_PROVIDER
+#+end_src
+
+** Spin
+*** Shift
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine shift_idx_spin(s,n_S,shift)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Shift for the partitionning alpha/beta of the spin orbitals
+  ! n_S(1): number of spin alpha in the correspondong list
+  ! n_S(2): number of spin beta in the correspondong list
+  END_DOC
+
+  integer, intent(in)  :: s, n_S(2)
+  integer, intent(out) :: shift
+
+  if (s == 1) then
+    shift = 0
+  else
+    shift = n_S(1)
+  endif
+  
+end
+#+end_src
+
+*** F
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Fock matrix corresponding to two lists of spin orbitals.
+  ! Ex: occ/occ, occ/vir,...
+  END_DOC
+  
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  integer, intent(in)           :: n1,n2, n1_S(2), n2_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2)
+  integer, intent(in)           :: dim1, dim2
+  
+  double precision, intent(out) :: f(dim1, dim2)
+
+  double precision, allocatable :: tmp_F(:,:)
+  integer                       :: i,j, idx_i,idx_j,i_shift,j_shift
+  integer                       :: tmp_i,tmp_j
+  integer                       :: si,sj,s
+
+  allocate(tmp_F(mo_num,mo_num))
+  
+  do sj = 1, 2
+    call shift_idx_spin(sj,n2_S,j_shift)
+    do si = 1, 2
+      call shift_idx_spin(si,n1_S,i_shift)
+      s = si + sj
+
+      if (s == 2 .or. s == 4) then
+        call get_fock_matrix_spin(det,sj,tmp_F)
+      else
+        do j = 1, mo_num
+          do i = 1, mo_num
+            tmp_F(i,j) = 0d0
+          enddo
+        enddo
+      endif
+
+      do tmp_j = 1, n2_S(sj)
+        j = list2(tmp_j,sj)
+        idx_j = tmp_j + j_shift
+        do tmp_i = 1, n1_S(si)
+          i = list1(tmp_i,si)
+          idx_i = tmp_i + i_shift
+          f(idx_i,idx_j) = tmp_F(i,j)
+        enddo
+      enddo
+
+    enddo
+  enddo
+  
+  deallocate(tmp_F)
+  
+end
+#+end_src
+
+*** Get F
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine get_fock_matrix_spin(det,s,f)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Fock matrix alpha or beta of an arbitrary det
+  END_DOC
+  
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  integer, intent(in)           :: s
+  
+  double precision, intent(out) :: f(mo_num,mo_num)
+  
+  integer                       :: p,q,i,s1,s2
+  integer(bit_kind)             :: res(N_int,2)
+  logical                       :: ok
+  double precision              :: mo_two_e_integral
+
+  if (s == 1) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = 2
+    s2 = 1
+  endif
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) &
+  !$OMP PRIVATE(p,q,ok,i,res)&
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do q = 1, mo_num
+    do p = 1, mo_num
+      f(p,q) = mo_one_e_integrals(p,q)
+      do i = 1, mo_num
+        call apply_hole(det, s1, i, res, ok, N_int)
+        if (ok) then
+          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
+        endif
+      enddo
+      do i = 1, mo_num
+        call apply_hole(det, s2, i, res, ok, N_int)
+        if (ok) then
+          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i)
+        endif
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+    
+end
+#+end_src
+
+*** V
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3, dim4
+  double precision, intent(out) :: v(dim1,dim2,dim3,dim4)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_k,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) &
+  !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, &
+  !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,&
+  !$OMP tmp_i,tmp_j,tmp_k,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
+    
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+            
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+            
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+            !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                       v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          else
+             !$OMP DO collapse(3)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                do tmp_j = 1, n2_S(sj)
+                  do tmp_i = 1, n1_S(si)  
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    i = list1(tmp_i,si)
+                    idx_i = tmp_i + i_shift
+                       v(idx_i,idx_j,idx_k,idx_l) = 0d0
+                  enddo
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          endif
+          
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
+*** V_3idx
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_l(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_k
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_l <= n4_S(1)) then
+    sl = 1
+  else
+    sl = 2
+  endif
+  call shift_idx_spin(sl,n4_S,l_shift)
+  tmp_l = idx_l - l_shift
+  l = list4(tmp_l,sl)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) &
+  !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, &
+  !$OMP i,j,k,idx_i,idx_j,idx_k,&
+  !$OMP tmp_i,tmp_j,tmp_k)&
+  !$OMP DEFAULT(NONE)
+  
+  do sk = 1, 2
+    call shift_idx_spin(sk,n3_S,k_shift)
+    do sj = 1, 2
+      call shift_idx_spin(sj,n2_S,j_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_k = 1, n3_S(sk)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_l(idx_i,idx_j,idx_k) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
+*** V_3idx_ij_l
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_k(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_j,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_k <= n3_S(1)) then
+    sk = 1
+  else
+    sk = 2
+  endif
+  call shift_idx_spin(sk,n3_S,k_shift)
+  tmp_k = idx_k - k_shift
+  k = list3(tmp_k,sk)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) &
+  !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, &
+  !$OMP i,j,l,idx_i,idx_j,idx_l,&
+  !$OMP tmp_i,tmp_j,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sj = 1, 2
+      call shift_idx_spin(sj,n2_S,j_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_j = 1, n2_S(sj)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_k(idx_i,idx_j,idx_l) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
+*** V_3idx_i_kl
+#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f
+subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j)
+
+  implicit none
+
+   BEGIN_DOC
+  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
+  END_DOC
+
+  integer, intent(in)           :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2)
+  integer, intent(in)           :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2)
+  integer, intent(in)           :: dim1, dim2, dim3
+  double precision, intent(out) :: v_j(dim1,dim2,dim3)
+
+  double precision              :: mo_two_e_integral
+  integer                       :: i,j,k,l,idx_i,idx_k,idx_l
+  integer                       :: i_shift,j_shift,k_shift,l_shift
+  integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
+  integer                       :: si,sj,sk,sl,s
+
+  PROVIDE cc_space_v
+
+  if (idx_j <= n2_S(1)) then
+    sj = 1
+  else
+    sj = 2
+  endif
+  call shift_idx_spin(sj,n2_S,j_shift)
+  tmp_j = idx_j - j_shift
+  j = list2(tmp_j,sj)
+  
+  !$OMP PARALLEL &
+  !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) &
+  !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, &
+  !$OMP i,k,l,idx_i,idx_k,idx_l,&
+  !$OMP tmp_i,tmp_k,tmp_l)&
+  !$OMP DEFAULT(NONE)
+  
+  do sl = 1, 2
+    call shift_idx_spin(sl,n4_S,l_shift)
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do si = 1, 2
+        call shift_idx_spin(si,n1_S,i_shift)
+  
+        s = si+sj+sk+sl
+        ! <aa||aa> or <bb||bb>
+        if (s == 4 .or. s == 8) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ab> or <ba||ba>
+        elseif (si == sk .and. sj == sl) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          
+        ! <ab||ba> or <ba||ab>
+        elseif (si == sl .and. sj == sk) then
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                   v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        else
+          !$OMP DO collapse(2)
+          do tmp_l = 1, n4_S(sl)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_i = 1, n1_S(si)  
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                i = list1(tmp_i,si)
+                idx_i = tmp_i + i_shift
+                   v_j(idx_i,idx_k,idx_l) = 0d0
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+        endif
+        
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL
+  
+end
+#+end_src
+
diff --git a/src/utils_cc/org/occupancy.org b/src/utils_cc/org/occupancy.org
new file mode 100644
index 00000000..246bbd5b
--- /dev/null
+++ b/src/utils_cc/org/occupancy.org
@@ -0,0 +1,341 @@
+* N spin orb
+#+begin_src f90 :comments org :tangle occupancy.irp.f
+subroutine extract_n_spin(det,n)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals
+  ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb)
+  END_DOC
+
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: n(4)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si
+  logical                       :: ok, is_core, is_del
+
+  ! Init
+  n = 0
+
+  ! Loop over the spin
+  do si = 1, 2
+    do i = 1, mo_num
+      call apply_hole(det, si, i, res, ok, N_int)
+      
+      ! in core ?
+      if (is_core(i)) cycle
+      ! in del ?
+      if (is_del(i)) cycle
+      
+      if (ok) then
+        ! particle
+        n(si) = n(si) + 1
+      else
+        ! hole
+        n(si+2) = n(si+2) + 1
+      endif
+    enddo
+  enddo
+
+  !print*,n(1),n(2),n(3),n(4)
+
+end  
+#+end_src
+
+* List_orb
+** Spin
+#+begin_src f90 :comments org :tangle occupancy.irp.f
+subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals
+  ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb)
+  END_DOC
+  
+  integer, intent(in)           :: nO_m, nV_m
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: list_occ(nO_m,2), list_vir(nV_m,2)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si, idx_o, idx_v, idx_i, idx_b
+  logical                       :: ok, is_core, is_del
+
+  list_occ = 0
+  list_vir = 0
+
+  ! List of occ/vir alpha/beta
+
+  ! occ alpha -> list_occ(:,1)
+  ! occ beta -> list_occ(:,2)
+  ! vir alpha -> list_vir(:,1)
+  ! vir beta -> list_vir(:,2)
+
+  ! Loop over the spin 
+  do si = 1, 2
+    ! tmp idx
+    idx_o = 1
+    idx_v = 1
+    do i = 1, mo_num
+      call apply_hole(det, si, i, res, ok, N_int)
+
+      ! in core ?
+      if (is_core(i)) cycle
+      ! in del ?
+      if (is_del(i)) cycle
+            
+      if (ok) then
+        ! particle
+        list_occ(idx_o,si) = i
+        idx_o = idx_o + 1
+      else
+        ! hole
+        list_vir(idx_v,si) = i
+        idx_v = idx_v + 1
+      endif
+    enddo
+  enddo
+
+end
+#+end_src
+
+** Space
+#+begin_src f90 :comments org :tangle occupancy.irp.f
+subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Returns the the list of occupied and virtual alpha spin orbitals
+  END_DOC
+  
+  integer, intent(in)           :: nO, nV
+  integer(bit_kind), intent(in) :: det(N_int,2)
+  
+  integer, intent(out)          :: list_occ(nO), list_vir(nV)
+  
+  integer(bit_kind)             :: res(N_int,2)
+  integer                       :: i, si, idx_o, idx_v, idx_i, idx_b
+  logical                       :: ok, is_core, is_del
+  
+  if (elec_alpha_num /= elec_beta_num) then
+    print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort'
+    call abort
+  endif
+
+  list_occ = 0
+  list_vir = 0
+
+  ! List of occ/vir alpha
+
+  ! occ alpha -> list_occ(:,1)
+  ! vir alpha -> list_vir(:,1)
+
+  ! tmp idx
+  idx_o = 1
+  idx_v = 1
+  do i = 1, mo_num
+    call apply_hole(det, 1, i, res, ok, N_int)
+
+    ! in core ?
+    if (is_core(i)) cycle
+    ! in del ?
+    if (is_del(i)) cycle
+
+    if (ok) then
+      ! particle
+      list_occ(idx_o) = i
+      idx_o = idx_o + 1
+    else
+      ! hole
+      list_vir(idx_v) = i
+      idx_v = idx_v + 1
+    endif
+  enddo
+
+end
+#+end_src
+
+** is_core
+#+begin_src f90 :comments org :tangle occupancy.irp.f
+function is_core(i)
+
+  implicit none
+
+  BEGIN_DOC
+  ! True if the orbital i is a core orbital
+  END_DOC
+
+  integer, intent(in) :: i
+  logical             :: is_core
+
+  integer             :: j
+
+  ! Init
+  is_core = .False.
+
+  ! Search
+  do j = 1, dim_list_core_orb
+    if (list_core(j) == i) then
+      is_core = .True.
+      exit
+    endif
+  enddo
+
+end
+#+end_src
+
+** is_del
+#+begin_src f90 :comments org :tangle occupancy.irp.f
+function is_del(i)
+
+  implicit none
+
+  BEGIN_DOC
+  ! True if the orbital i is a deleted orbital
+  END_DOC
+
+  integer, intent(in) :: i
+  logical             :: is_del
+
+  integer             :: j
+
+  ! Init
+  is_del = .False.
+
+  ! Search
+  do j = 1, dim_list_core_orb
+    if (list_core(j) == i) then
+      is_del = .True.
+      exit
+    endif
+  enddo
+
+end
+#+end_src
+
+* Providers
+** N orb
+#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f
+ BEGIN_PROVIDER [integer, cc_nO_m]
+&BEGIN_PROVIDER [integer, cc_nOa]
+&BEGIN_PROVIDER [integer, cc_nOb]
+&BEGIN_PROVIDER [integer, cc_nOab]
+&BEGIN_PROVIDER [integer, cc_nV_m]
+&BEGIN_PROVIDER [integer, cc_nVa]
+&BEGIN_PROVIDER [integer, cc_nVb]
+&BEGIN_PROVIDER [integer, cc_nVab]
+&BEGIN_PROVIDER [integer, cc_n_mo]
+&BEGIN_PROVIDER [integer, cc_nO_S, (2)]
+&BEGIN_PROVIDER [integer, cc_nV_S, (2)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det
+  ! a: alpha, b: beta
+  ! nO_m: max(a,b) occupied 
+  ! nOa: nb a occupied 
+  ! nOb: nb b occupied 
+  ! nOab: nb a+b occupied 
+  ! nV_m: max(a,b) virtual 
+  ! nVa: nb a virtual 
+  ! nVb: nb b virtual 
+  ! nVab: nb a+b virtual 
+  END_DOC
+
+  integer :: n_spin(4)
+
+  ! Extract number of occ/vir alpha/beta spin orbitals
+  call extract_n_spin(psi_det(1,1,cc_ref),n_spin)
+
+  cc_nOa  = n_spin(1)
+  cc_nOb  = n_spin(2)
+  cc_nOab = cc_nOa + cc_nOb    !n_spin(1) + n_spin(2)
+  cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2))
+  cc_nVa  = n_spin(3)
+  cc_nVb  = n_spin(4)
+  cc_nVab = cc_nVa + cc_nVb    !n_spin(3) + n_spin(4)
+  cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4))
+  cc_n_mo = cc_nVa + cc_nVb    !n_spin(1) + n_spin(3)
+  cc_nO_S = (/cc_nOa,cc_nOb/)
+  cc_nV_S = (/cc_nVa,cc_nVb/)
+
+END_PROVIDER
+#+end_src
+
+** List orb
+
+*** General
+#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f
+ BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of general orbitals without core and deleted ones
+  END_DOC
+
+  integer :: i,j
+  logical :: is_core, is_del
+  
+  j = 1
+  do i = 1, mo_num
+    ! in core ?
+    if (is_core(i)) cycle
+    ! in del ?
+    if (is_del(i)) cycle
+    cc_list_gen(j) = i
+    j = j+1
+  enddo
+
+END_PROVIDER
+#+end_src
+
+*** Space
+#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f
+ BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)]
+&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of occupied and virtual spatial orbitals without core and deleted ones
+  END_DOC
+
+  call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir)
+
+END_PROVIDER
+#+end_src
+
+*** Spin
+#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f
+ BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)]
+&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)]
+&BEGIN_PROVIDER [logical, cc_ref_is_open_shell]
+
+  implicit none
+
+  BEGIN_DOC
+  ! List of occupied and virtual spin orbitals without core and deleted ones
+  END_DOC
+
+  integer :: i
+
+  call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin)
+
+  cc_ref_is_open_shell = .False.
+  do i = 1, cc_nO_m
+    if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then
+       cc_ref_is_open_shell = .True.
+    endif
+  enddo
+
+
+END_PROVIDER
+#+end_src
diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org
new file mode 100644
index 00000000..5f67859c
--- /dev/null
+++ b/src/utils_cc/org/phase.org
@@ -0,0 +1,178 @@
+#+begin_src f90 :comments org :notangle phase.irp.f
+program run
+  implicit none
+
+  integer :: n(2), degree1, degree2, exc(0:2,2,2)
+  integer, allocatable :: list_anni(:,:), list_crea(:,:)
+  double precision :: phase1, phase2
+  integer :: h1,h2,p1,p2,s1,s2,i,j
+
+  allocate(list_anni(N_int*bit_kind_size,2))
+  allocate(list_crea(N_int*bit_kind_size,2))
+
+  do i = 1, N_det-1
+    do j = i+1, N_det
+      !call print_det(psi_det(1,1,j),N_int)
+      call get_excitation(psi_det(1,1,i),psi_det(1,1,j),exc,degree1,phase1,N_int)
+      call decode_exc(exc,degree1,h1,p1,h2,p2,s1,s2)
+      !print*,'old',degree1,phase1
+      !print*,'h1:',h1,'h2:',h2,'s1:',s1,'s2:',s2
+      !print*,'p1:',p1,'p2:',p2
+      call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree1,N_int)
+      call get_excitation_general(psi_det(1,1,i),psi_det(1,1,j),degree2,n,list_anni,list_crea,phase2,N_int)
+      !print*,'new',degree2,phase2
+      !print*,'ha:',list_anni(1:n(1),1),'hb',list_anni(1:n(2),2)
+      !print*,'pa:',list_crea(1:n(1),1),'pb',list_crea(1:n(2),2)
+      !print*,''
+      if (degree1 /= degree2) then
+        print*,'Error degree:',degree1,degree2
+        call abort
+      endif
+      if (degree1 <= 2 .and. phase1 /= phase2) then
+        print*,'Error phase',phase1,phase2
+        call abort
+      endif
+    enddo
+  enddo
+  
+end
+#+end_src
+
+** phase
+#+begin_src f90 :comments org :tangle phase.irp.f
+subroutine get_phase_general(det1,det2,phase,degree,Nint)
+  implicit none
+
+  integer, intent(in)           :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2)
+  double precision, intent(out) :: phase
+  integer, intent(out)          :: degree
+  integer :: n(2)
+  integer, allocatable :: list_anni(:,:), list_crea(:,:)
+
+  allocate(list_anni(N_int*bit_kind_size,2))
+  allocate(list_crea(N_int*bit_kind_size,2))
+
+  call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint)
+end
+#+end_src
+
+** Get excitation general
+#+begin_src f90 :comments org :tangle phase.irp.f
+subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint)
+
+  use bitmasks
+
+  implicit none
+
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in)  :: det1(Nint,2), det2(Nint,2)
+  double precision, intent(out)  :: phase
+  integer, intent(out)           :: list_crea(Nint*bit_kind_size,2)
+  integer, intent(out)           :: list_anni(Nint*bit_kind_size,2)
+  integer, intent(out)           :: degree, n(2)
+  
+  integer, allocatable           :: l1(:,:), l2(:,:) 
+  integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:)
+  integer, allocatable           :: pos_anni(:,:), pos_crea(:,:)
+
+  integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d
+
+  allocate(l1(Nint*bit_kind_size,2))
+  allocate(l2(Nint*bit_kind_size,2))
+  allocate(det_crea(Nint,2),det_anni(Nint,2))
+
+  ! 1      111010
+  ! 2      110101
+  !
+  !not 1-> 000101
+  !    2   110101
+  !and     000101 -> crea
+  !
+  !    1   111010
+  !not 2-> 001010
+  !        001010 -> anni
+
+  do j = 1, 2
+    do i = 1, Nint
+      det_crea(i,j) = iand(not(det1(i,j)),det2(i,j))
+    enddo
+  enddo
+  
+  do j = 1, 2
+    do i = 1, Nint
+      det_anni(i,j) = iand(det1(i,j),not(det2(i,j)))
+    enddo
+  enddo
+
+  call bitstring_to_list_ab(det1,l1,n1,Nint)
+  call bitstring_to_list_ab(det2,l2,n2,Nint)
+  call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint)
+  call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint)
+
+  do i = 1, 2
+    if (n_crea(i) /= n_anni(i)) then
+      print*,'Well, it seems we have a problem here...'
+      call abort
+    endif
+  enddo
+
+  !1    11110011001  1 2 3 4 7 8  11
+  !pos               1 2 3 4 5 6  7 
+  !2    11100101011  1 2 3 6 8 10 11
+  !anni 00010010000  4 7
+  !pos               4 5
+  !crea 00000100010  6 10
+  !pos               4 6
+  !4 -> 6  pos(4 -> 4)
+  !7 -> 10 pos(5 -> 6)
+
+  n = n_anni
+  degree = n_anni(1) + n_anni(2)
+
+  allocate(pos_anni(max(n(1),n(2)),2))
+  allocate(pos_crea(max(n(1),n(2)),2))
+  
+  ! Search pos anni
+  do j = 1, 2
+    k = 1
+    do i = 1, n1(j)
+       if (l1(i,j) /= list_anni(k,j)) cycle
+       pos_anni(k,j) = i
+       k = k + 1
+    enddo
+  enddo
+
+  ! Search pos crea
+  do j = 1, 2
+    k = 1
+    do i = 1, n2(j)
+       if (l2(i,j) /= list_crea(k,j)) cycle
+       pos_crea(k,j) = i
+       k = k + 1
+    enddo
+  enddo
+
+  ! Distance between the ith anni and the ith crea op
+  ! By doing so there is no crossing between the different pairs of anni/crea
+  ! and the phase is determined by the sum of the distances
+  ! -> (-1)^{sum of the distances}
+  d = 0
+  do j = 1, 2
+    do i = 1, n(j)
+      d = d + abs(pos_anni(i,j) - pos_crea(i,j))
+    enddo
+  enddo
+  
+  phase = dble((-1)**d)
+
+  ! Debug
+  !print*,l2(1:n2(1),1)
+  !print*,l2(1:n2(2),2)
+  !!call print_det(det1,Nint)
+  !!call print_det(det2,Nint)
+  !print*,phase
+  !print*,''
+end
+#+end_src
+
diff --git a/src/utils_cc/org/print_wf_qp_edit.org b/src/utils_cc/org/print_wf_qp_edit.org
new file mode 100644
index 00000000..0f19ac76
--- /dev/null
+++ b/src/utils_cc/org/print_wf_qp_edit.org
@@ -0,0 +1,33 @@
+#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f
+program run
+
+  implicit none
+
+  read_wf = .true.
+  touch read_wf
+
+  call print_wf_qp_edit()
+
+end 
+#+end_src
+
+#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f
+subroutine print_wf_qp_edit()
+  
+  implicit none
+
+  BEGIN_DOC
+  ! Print the psi_det wave function up to n_det_qp_edit
+  END_DOC
+  
+  integer :: i
+
+  do i = 1, n_det_qp_edit
+    print*,i
+    write(*,'(100(1pE12.4))') psi_coef(i,:)
+    call print_det(psi_det(1,1,i),N_int)
+    print*,''
+  enddo
+
+end
+#+end_src
diff --git a/src/utils_cc/org/update_t.org b/src/utils_cc/org/update_t.org
new file mode 100644
index 00000000..c0207b22
--- /dev/null
+++ b/src/utils_cc/org/update_t.org
@@ -0,0 +1,76 @@
+* T1
+#+begin_src f90 :comments org :tangle update_t.irp.f
+subroutine update_t1(nO,nV,f_o,f_v,r1,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)             :: nO, nV
+  double precision, intent(in)    :: f_o(nO), f_v(nV), r1(nO, nV)
+
+  ! inout
+  double precision, intent(inout) :: t1(nO, nV)
+
+  ! internal
+  integer                         :: i,a
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) &
+  !$OMP PRIVATE(i,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do a = 1, nV
+    do i = 1, nO
+      t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end  
+#+end_src
+
+* T2
+#+begin_src f90 :comments org :tangle update_t.irp.f
+subroutine update_t2(nO,nV,f_o,f_v,r2,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)             :: nO, nV
+  double precision, intent(in)    :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV)
+
+  ! inout
+  double precision, intent(inout) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                         :: i,j,a,b
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end  
+#+end_src
+
diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f
new file mode 100644
index 00000000..01b41f49
--- /dev/null
+++ b/src/utils_cc/phase.irp.f
@@ -0,0 +1,135 @@
+! phase
+
+subroutine get_phase_general(det1,det2,phase,degree,Nint)
+  implicit none
+
+  integer, intent(in)           :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2)
+  double precision, intent(out) :: phase
+  integer, intent(out)          :: degree
+  integer :: n(2)
+  integer, allocatable :: list_anni(:,:), list_crea(:,:)
+
+  allocate(list_anni(N_int*bit_kind_size,2))
+  allocate(list_crea(N_int*bit_kind_size,2))
+
+  call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint)
+end
+
+! Get excitation general
+
+subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint)
+
+  use bitmasks
+
+  implicit none
+
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in)  :: det1(Nint,2), det2(Nint,2)
+  double precision, intent(out)  :: phase
+  integer, intent(out)           :: list_crea(Nint*bit_kind_size,2)
+  integer, intent(out)           :: list_anni(Nint*bit_kind_size,2)
+  integer, intent(out)           :: degree, n(2)
+  
+  integer, allocatable           :: l1(:,:), l2(:,:) 
+  integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:)
+  integer, allocatable           :: pos_anni(:,:), pos_crea(:,:)
+
+  integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d
+
+  allocate(l1(Nint*bit_kind_size,2))
+  allocate(l2(Nint*bit_kind_size,2))
+  allocate(det_crea(Nint,2),det_anni(Nint,2))
+
+  ! 1      111010
+  ! 2      110101
+  !
+  !not 1-> 000101
+  !    2   110101
+  !and     000101 -> crea
+  !
+  !    1   111010
+  !not 2-> 001010
+  !        001010 -> anni
+
+  do j = 1, 2
+    do i = 1, Nint
+      det_crea(i,j) = iand(not(det1(i,j)),det2(i,j))
+    enddo
+  enddo
+  
+  do j = 1, 2
+    do i = 1, Nint
+      det_anni(i,j) = iand(det1(i,j),not(det2(i,j)))
+    enddo
+  enddo
+
+  call bitstring_to_list_ab(det1,l1,n1,Nint)
+  call bitstring_to_list_ab(det2,l2,n2,Nint)
+  call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint)
+  call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint)
+
+  do i = 1, 2
+    if (n_crea(i) /= n_anni(i)) then
+      print*,'Well, it seems we have a problem here...'
+      call abort
+    endif
+  enddo
+
+  !1    11110011001  1 2 3 4 7 8  11
+  !pos               1 2 3 4 5 6  7 
+  !2    11100101011  1 2 3 6 8 10 11
+  !anni 00010010000  4 7
+  !pos               4 5
+  !crea 00000100010  6 10
+  !pos               4 6
+  !4 -> 6  pos(4 -> 4)
+  !7 -> 10 pos(5 -> 6)
+
+  n = n_anni
+  degree = n_anni(1) + n_anni(2)
+
+  allocate(pos_anni(max(n(1),n(2)),2))
+  allocate(pos_crea(max(n(1),n(2)),2))
+  
+  ! Search pos anni
+  do j = 1, 2
+    k = 1
+    do i = 1, n1(j)
+       if (l1(i,j) /= list_anni(k,j)) cycle
+       pos_anni(k,j) = i
+       k = k + 1
+    enddo
+  enddo
+
+  ! Search pos crea
+  do j = 1, 2
+    k = 1
+    do i = 1, n2(j)
+       if (l2(i,j) /= list_crea(k,j)) cycle
+       pos_crea(k,j) = i
+       k = k + 1
+    enddo
+  enddo
+
+  ! Distance between the ith anni and the ith crea op
+  ! By doing so there is no crossing between the different pairs of anni/crea
+  ! and the phase is determined by the sum of the distances
+  ! -> (-1)^{sum of the distances}
+  d = 0
+  do j = 1, 2
+    do i = 1, n(j)
+      d = d + abs(pos_anni(i,j) - pos_crea(i,j))
+    enddo
+  enddo
+  
+  phase = dble((-1)**d)
+
+  ! Debug
+  !print*,l2(1:n2(1),1)
+  !print*,l2(1:n2(2),2)
+  !!call print_det(det1,Nint)
+  !!call print_det(det2,Nint)
+  !print*,phase
+  !print*,''
+end
diff --git a/src/utils_cc/print_wf_qp_edit.irp.f b/src/utils_cc/print_wf_qp_edit.irp.f
new file mode 100644
index 00000000..1337621d
--- /dev/null
+++ b/src/utils_cc/print_wf_qp_edit.irp.f
@@ -0,0 +1,29 @@
+program run
+
+  implicit none
+
+  read_wf = .true.
+  touch read_wf
+
+  call print_wf_qp_edit()
+
+end
+
+subroutine print_wf_qp_edit()
+  
+  implicit none
+
+  BEGIN_DOC
+  ! Print the psi_det wave function up to n_det_qp_edit
+  END_DOC
+  
+  integer :: i
+
+  do i = 1, n_det_qp_edit
+    print*,i
+    write(*,'(100(1pE12.4))') psi_coef(i,:)
+    call print_det(psi_det(1,1,i),N_int)
+    print*,''
+  enddo
+
+end
diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f
new file mode 100644
index 00000000..dbd4f4bd
--- /dev/null
+++ b/src/utils_cc/update_t.irp.f
@@ -0,0 +1,73 @@
+! T1
+
+subroutine update_t1(nO,nV,f_o,f_v,r1,t1)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T1 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)             :: nO, nV
+  double precision, intent(in)    :: f_o(nO), f_v(nV), r1(nO, nV)
+
+  ! inout
+  double precision, intent(inout) :: t1(nO, nV)
+
+  ! internal
+  integer                         :: i,a
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) &
+  !$OMP PRIVATE(i,a) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(1)
+  do a = 1, nV
+    do i = 1, nO
+      t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
+
+! T2
+
+subroutine update_t2(nO,nV,f_o,f_v,r2,t2)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Update the T2 amplitudes for CC
+  END_DOC
+
+  ! in
+  integer, intent(in)             :: nO, nV
+  double precision, intent(in)    :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV)
+
+  ! inout
+  double precision, intent(inout) :: t2(nO, nO, nV, nV)
+
+  ! internal
+  integer                         :: i,j,a,b
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) &
+  !$OMP PRIVATE(i,j,a,b) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO collapse(3)
+  do b = 1, nV
+    do a = 1, nV
+      do j = 1, nO
+        do i = 1, nO
+          t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  
+end
diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg
new file mode 100644
index 00000000..9c9f6248
--- /dev/null
+++ b/src/utils_trust_region/EZFIO.cfg
@@ -0,0 +1,89 @@
+[thresh_delta]
+type: double precision
+doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[thresh_rho]
+type: double precision
+doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho)
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_eig]
+type: double precision
+doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[thresh_model]
+type: double precision
+doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[absolute_eig]
+type: logical
+doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region)
+interface: ezfio,provider,ocaml
+default: false
+
+[thresh_wtg]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_wtg2]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[avoid_saddle]
+type: logical
+doc: Test to avoid saddle point, active if true
+interface: ezfio,provider,ocaml
+default: false
+
+[version_avoid_saddle]
+type: integer
+doc: cf. trust region, not stable
+interface: ezfio,provider,ocaml
+default: 3
+
+[thresh_rho_2]
+type: double precision
+doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_cc]
+type: double precision
+doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc)
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_model_2]
+type: double precision
+doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[version_lambda_search]
+type: integer
+doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 ->  1/||x||^2 - 1/delta^2 .eq. 0
+interface: ezfio,provider,ocaml
+default: 2
+
+[nb_it_max_lambda]
+type: integer
+doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 100
+
+[nb_it_max_pre_search]
+type: integer
+doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 40
diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED
new file mode 100644
index 00000000..1a65ce38
--- /dev/null
+++ b/src/utils_trust_region/NEED
@@ -0,0 +1 @@
+hartree_fock
diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst
new file mode 100644
index 00000000..6a0689b6
--- /dev/null
+++ b/src/utils_trust_region/README.rst
@@ -0,0 +1,5 @@
+============
+trust_region
+============
+
+The documentation can be found in the org files.
diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh
new file mode 100755
index 00000000..059cbe7d
--- /dev/null
+++ b/src/utils_trust_region/TANGLE_org_mode.sh
@@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f
new file mode 100644
index 00000000..eac17275
--- /dev/null
+++ b/src/utils_trust_region/algo_trust.irp.f
@@ -0,0 +1,248 @@
+! Algorithm for the trust region
+
+! step_in_trust_region:
+! Computes the step in the trust region (delta)
+! (automatically sets at the iteration 0 and which evolves during the
+! process in function of the evolution of rho). The step is computing by
+! constraining its norm with a lagrange multiplier.
+! Since the calculation of the step is based on the Newton method, an
+! estimation of the gain in energy is given using the Taylors series
+! truncated at the second order (criterion_model).
+! If (DABS(criterion-criterion_model) < 1d-12) then 
+!   must_exit = .True.
+! else 
+!   must_exit = .False.
+
+! This estimation of the gain in energy is used by
+! is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+! If the step must be cancelled, the calculation restart from the same
+! hessian and gradient and recomputes the step but in a smaller trust
+! region and so on until the step is accepted. If the step is accepted
+! the hessian and the gradient are recomputed to produce a new step.
+
+! Example:
+
+
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+
+
+
+! Variables:
+
+! Input:
+! | n              | integer          | m*(m-1)/2                                                                      |
+! | m              | integer          | number of mo in the mo_class                                                   |
+! | H(n,n)         | double precision | Hessian                                                                        |
+! | v_grad(n)      | double precision | Gradient                                                                       |
+! | W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+! | e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+! | criterion      | double precision | Actual criterion                                                               |
+! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+! | rho            | double precision | Given by is_step_cancel_trus_region                                            |
+! |                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+! | nb_iter        | integer          | Actual number of iterations                                                    |
+
+! Input/output:
+! | delta           | double precision | Radius of the trust region                                                      |
+
+! Output:
+! | criterion_model | double precision | Predicted criterion after the rotation                                          |
+! | x(n)            | double precision | Step                                                                            |
+! | must_exit       | logical          | If the program must exit the loop                                               |
+
+
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+
+
+
+! Variables:
+
+! Input:
+! | nb_iter         | integer          | actual number of iterations                    |
+! | prev_criterion  | double precision | criterion before the application of the step x |
+! | criterion       | double precision | criterion after the application of the step x  |
+! | criterion_model | double precision | predicted criterion after the application of x |
+
+! Output:
+! | rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+! | cancel_step | logical          | If the step must be cancelled                                        |
+
+
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org
new file mode 100644
index 00000000..aa836f98
--- /dev/null
+++ b/src/utils_trust_region/algo_trust.org
@@ -0,0 +1,593 @@
+* Algorithm for the trust region
+
+step_in_trust_region:
+Computes the step in the trust region (delta)
+(automatically sets at the iteration 0 and which evolves during the
+process in function of the evolution of rho). The step is computing by
+constraining its norm with a lagrange multiplier.
+Since the calculation of the step is based on the Newton method, an
+estimation of the gain in energy is given using the Taylors series
+truncated at the second order (criterion_model).
+If (DABS(criterion-criterion_model) < 1d-12) then 
+  must_exit = .True.
+else 
+  must_exit = .False.
+
+This estimation of the gain in energy is used by
+is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+If the step must be cancelled, the calculation restart from the same
+hessian and gradient and recomputes the step but in a smaller trust
+region and so on until the step is accepted. If the step is accepted
+the hessian and the gradient are recomputed to produce a new step.
+
+Example:
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+#+END_SRC
+
+Variables:
+
+Input:
+| n              | integer          | m*(m-1)/2                                                                      |
+| m              | integer          | number of mo in the mo_class                                                   |
+| H(n,n)         | double precision | Hessian                                                                        |
+| v_grad(n)      | double precision | Gradient                                                                       |
+| W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+| e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+| criterion      | double precision | Actual criterion                                                               |
+| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+| rho            | double precision | Given by is_step_cancel_trus_region                                            |
+|                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+| nb_iter        | integer          | Actual number of iterations                                                    |
+
+Input/output:
+| delta           | double precision | Radius of the trust region                                                      |
+
+Output:
+| criterion_model | double precision | Predicted criterion after the rotation                                          |
+| x(n)            | double precision | Step                                                                            |
+| must_exit       | logical          | If the program must exit the loop                                               |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+#+END_SRC
+
+Variables:
+
+Input:
+| nb_iter         | integer          | actual number of iterations                    |
+| prev_criterion  | double precision | criterion before the application of the step x |
+| criterion       | double precision | criterion after the application of the step x  |
+| criterion_model | double precision | predicted criterion after the application of x |
+
+Output:
+| rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+| cancel_step | logical          | If the step must be cancelled                                        |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
+#+END_SRC
+
+** Template for MOs
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt
+subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:)
+  double precision, allocatable :: prev_mos(:,:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit, enforce_step_cancellation
+  integer                       :: nb_iter, info, nb_sub_iter
+  integer                       :: i,j,tmp_i,tmp_j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size))
+  allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Provide the criterion, but unnecessary because it's done 
+  ! automatically
+  PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      ! The new hessian and gradient are computed at the end of the previous iteration
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! 1D tmp -> 2D tmp 
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Forces the step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)
+
+          ! touch mo_coef
+          call clear_mo_map ! Only if you are using the bi-electronic integrals
+          ! mo_coef becomes valid
+          ! And avoid the recomputation of the providers which depend of mo_coef
+          TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! To enforce the program to provide new criterion after the update
+          ! of the parameters
+          FREE C_PROVIDER
+          PROVIDE C_PROVIDER
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step ?
+          if (cancel_step) then
+              ! Replacement by the previous MOs
+              mo_coef = prev_mos
+              ! call save_mos() ! depends of the time for 1 iteration
+
+              ! No need to clear_mo_map since we don't recompute the gradient and the hessian
+              ! mo_coef becomes valid
+              ! Avoid the recomputation of the providers which depend of mo_coef
+              TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          else
+              ! The step is accepted:
+              ! criterion -> prev criterion
+
+              ! The replacement "criterion -> prev criterion" is already done
+              ! in trust_region_rho, so if the criterion does not have a reason
+              ! to change, it will change nothing for the criterion and will 
+              ! force the program to provide the new hessian, gradient and 
+              ! convergence criterion for the next iteration.
+              ! But in the case of orbital optimization we diagonalize the CI 
+              ! matrix after the "FREE" statement, so the criterion will change
+      
+              FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              prev_criterion = C_PROVIDER
+              
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! call save_mos() ! depends of the time for 1 iteration
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      ! Provide the convergence criterion
+      ! Provide the gradient and the hessian for the next iteration
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+
+  ! Save the final MOs
+  call save_mos()
+
+  ! Diagonalization of the hessian 
+  ! (To see the eigenvalues at the end of the optimization)
+  call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+  
+  deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos)
+
+end
+#+END_SRC
+
+** Cartesian version
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt
+subroutine algo_trust_cartesian_template(tmp_n)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n
+  
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_x(:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit
+  integer                       :: nb_iter, nb_sub_iter
+  integer                       :: i,j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n))
+
+  PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      if (nb_iter > 0) then
+        PROVIDE H_PROVIDER g_PROVIDER
+      endif
+
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! New coordinates, check the sign 
+          X_PROVIDER = X_PROVIDER - tmp_x
+
+          ! touch X_PROVIDER
+          TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER 
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! New criterion
+          PROVIDE C_PROVIDER ! Unnecessary
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancel the previous step
+          if (cancel_step) then
+              ! Replacement by the previous coordinates, check the sign 
+              X_PROVIDER = X_PROVIDER + tmp_x
+
+              ! Avoid the recomputation of the hessian and the gradient
+              TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+  
+ deallocate(e_val, W, tmp_x)
+
+end
+#+END_SRC
+
+** Script template
+#+BEGIN_SRC bash :tangle script_template_mos.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
+#+BEGIN_SRC bash :tangle script_template_xyz.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_X_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file
+sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f
new file mode 100644
index 00000000..e274ec11
--- /dev/null
+++ b/src/utils_trust_region/apply_mo_rotation.irp.f
@@ -0,0 +1,85 @@
+! Apply MO rotation
+! Subroutine to apply the rotation matrix to the coefficients of the
+! MOs.
+
+! New MOs = Old MOs . Rotation matrix
+
+! *Compute the new MOs with the previous MOs and a rotation matrix*
+
+! Provided:
+! | mo_num                 | integer          | number of MOs           |
+! | ao_num                 | integer          | number of AOs           |
+! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+! Intent in:
+! | R(mo_num,mo_num) | double precision | rotation matrix |
+
+! Intent out:
+! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+! Internal:
+! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+! | i,j                    | integer          | indexes                      |
+
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org
new file mode 100644
index 00000000..918581b7
--- /dev/null
+++ b/src/utils_trust_region/apply_mo_rotation.org
@@ -0,0 +1,86 @@
+* Apply MO rotation
+Subroutine to apply the rotation matrix to the coefficients of the
+MOs.
+
+New MOs = Old MOs . Rotation matrix
+
+*Compute the new MOs with the previous MOs and a rotation matrix*
+
+Provided:
+| mo_num                 | integer          | number of MOs           |
+| ao_num                 | integer          | number of AOs           |
+| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+Intent in:
+| R(mo_num,mo_num) | double precision | rotation matrix |
+
+Intent out:
+| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+Internal:
+| new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+| i,j                    | integer          | indexes                      |
+#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f
new file mode 100644
index 00000000..35e12232
--- /dev/null
+++ b/src/utils_trust_region/mat_to_vec_index.irp.f
@@ -0,0 +1,61 @@
+! Matrix to vector index
+
+! *Compute the index i of a vector element from the indexes p,q of a
+! matrix element*
+
+! Lower diagonal matrix (p,q), p > q -> vector (i)
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | p,q | integer | indexes of a matrix element in the lower diagonal |
+! |     |         | p > q, q -> column                                |
+! |     |         | p -> row,                                         |
+! |     |         | q -> column                                       |
+
+! Input:
+! | i | integer | corresponding index in the vector |
+
+
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org
new file mode 100644
index 00000000..50840584
--- /dev/null
+++ b/src/utils_trust_region/mat_to_vec_index.org
@@ -0,0 +1,63 @@
+* Matrix to vector index
+
+*Compute the index i of a vector element from the indexes p,q of a
+matrix element*
+
+Lower diagonal matrix (p,q), p > q -> vector (i)
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| p,q | integer | indexes of a matrix element in the lower diagonal |
+|     |         | p > q, q -> column                                |
+|     |         | p -> row,                                         |
+|     |         | q -> column                                       |
+
+Input:
+| i | integer | corresponding index in the vector |
+
+#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
+#+END_SRC
+
diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h
new file mode 100644
index 00000000..bbfabfec
--- /dev/null
+++ b/src/utils_trust_region/pi.h
@@ -0,0 +1,2 @@
+  !logical, parameter :: debug=.False.
+  double precision, parameter :: pi = 3.1415926535897932d0
diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f
new file mode 100644
index 00000000..4738fd67
--- /dev/null
+++ b/src/utils_trust_region/rotation_matrix.irp.f
@@ -0,0 +1,443 @@
+! Rotation matrix
+
+! *Build a rotation matrix from an antisymmetric matrix*
+
+! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+! $$
+! \textbf{R}=\exp(\textbf{A})
+! $$
+
+! So :
+! \begin{align*}
+! \textbf{R}=& \exp(\textbf{A}) \\
+! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+! \end{align*}
+
+! With :
+! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+! $\tau$ : $\sqrt{-x}$
+! $x$ : eigenvalues of $\textbf{A}^2$
+
+! Input:
+! | A(n,n) | double precision | antisymmetric matrix                                            |
+! | n      | integer          | number of columns of the A matrix                               |
+! | LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+! | LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+! Output:
+! | R(n,n) | double precision | Rotation matrix                                      |
+! | info   | integer          | if info = 0, the execution is successful             |
+! |        |                  | if info = k, the k-th parameter has an illegal value |
+! |        |                  | if info = -k, the algorithm failed                   |
+
+! Internal:
+! | B(n,n)        | double precision | B = A.A                                                       |
+! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+! | lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+! | W(n,n)        | double precision | eigenvectors of B                                             |
+! | e_val(n)      | double precision | eigenvalues of B                                              |
+! | m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+! | cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+! | sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+! | tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+! | part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+! | part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+! | part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+! | part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+! | part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+! | part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+! | RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+! | norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+! | i,j           | integer          | indexes                                                       |
+
+! Functions:
+! | dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+! | disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+
+! Pre-conditions
+
+! Initialization
+info=0
+enforce_step_cancellation = .False.
+
+! Size of matrix A must be at least 1 by 1
+if (n<1) then
+  info = 3
+  print*, 'WARNING: invalid parameter 5'
+  print*, 'n<1'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDA < n) then
+  info = 25
+  print*, 'WARNING: invalid parameter 2 or 5'
+  print*, 'LDA < n'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDR < n) then
+  info = 4
+  print*, 'WARNING: invalid parameter 4'
+  print*, 'LDR < n'
+  return
+endif
+
+! Matrix elements of A must by non-NaN
+do j = 1, n
+  do i = 1, n
+    if (disnan(A(i,j))) then
+      info=1
+      print*, 'WARNING: invalid parameter 1'
+      print*, 'NaN element in A matrix'
+      return
+    endif
+  enddo
+enddo
+
+do i = 1, n
+  if (A(i,i) /= 0d0) then
+    print*, 'WARNING: matrix A is not antisymmetric'
+    print*, 'Non 0 element on the diagonal', i, A(i,i)
+    call ABORT
+  endif
+enddo
+
+do j = 1, n
+  do i = 1, n
+    if (A(i,j)+A(j,i)>1d-16) then
+      print*, 'WANRING: matrix A is not antisymmetric'
+      print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+      print*, 'diff:', A(i,j)+A(j,i)
+      call ABORT 
+    endif
+  enddo
+enddo
+
+! Fix for too big elements ! bad idea better to cancel if the error is too big
+!do j = 1, n
+!  do i = 1, n
+!    A(i,j) = mod(A(i,j),2d0*pi)
+!    if (dabs(A(i,j)) > pi) then
+!      A(i,j) = 0d0
+!    endif
+!  enddo
+!enddo
+
+max_elem_A = 0d0
+do j = 1, n
+  do i = 1, n
+    if (ABS(A(i,j)) > ABS(max_elem_A)) then
+      max_elem_A = A(i,j)
+    endif
+  enddo
+enddo
+print*,'max element in A', max_elem_A
+
+if (ABS(max_elem_A) > 2 * pi) then
+   print*,''
+   print*,'WARNING: ABS(max_elem_A) > 2 pi '
+   print*,''
+endif
+
+! B=A.A
+!     - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+!     - Diagonalization of $\textbf{B}$ 
+!     W, the eigenvectors
+!     e_val, the eigenvalues
+
+
+! Compute B=A.A
+
+call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+! Copy B in W, diagonalization will put the eigenvectors in W
+W=B
+
+! Diagonalization of B
+! Eigenvalues -> e_val
+! Eigenvectors -> W
+lwork = 3*n-1
+allocate(work(lwork,n))
+
+print*,'Starting diagonalization ...'
+
+call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+deallocate(work)
+
+if (info2 == 0) then
+   print*, 'Diagonalization : Done'
+elseif (info2 < 0) then
+   print*, 'WARNING: error in the diagonalization'
+   print*, 'Illegal value of the ', info2,'-th parameter'
+else
+   print*, "WARNING: Diagonalization failed to converge"
+endif
+
+! Tau^-1, cos(tau), sin(tau)
+!     $$\tau = \sqrt{-x}$$
+!     - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+!     - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+!     - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+!     These matrices are diagonals
+
+! Diagonal matrix m_diag
+do j = 1, n
+   if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+      e_val(j) = 0.d0
+   else
+      e_val(j) = - e_val(j)
+   endif
+enddo
+
+m_diag = 0.d0
+do i = 1, n
+   m_diag(i,i) = e_val(i)
+enddo
+
+! cos_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+      else
+         cos_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! sin_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+      else
+         sin_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! Debug, display the cos_tau and sin_tau matrix
+!if (debug) then
+!   print*, 'cos_tau'
+!   do i = 1, n
+!      print*, cos_tau(i,:)
+!   enddo
+!   print*, 'sin_tau'
+!   do i = 1, n
+!      print*, sin_tau(i,:)
+!   enddo
+!endif
+
+! tau^-1
+do j = 1, n
+   do i = 1, n
+      if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+         tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+      else
+         tau_m1(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+max_elem = 0d0
+do i = 1, n
+   if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+      max_elem = tau_m1(i,i)
+   endif
+enddo
+print*,'max elem tau^-1:', max_elem
+
+! Debug
+!print*,'eigenvalues:'
+!do i = 1, n 
+!  print*, e_val(i)
+!enddo
+
+!Debug, display tau^-1
+!if (debug) then
+!   print*, 'tau^-1'
+!   do i = 1, n
+!      print*,tau_m1(i,:)
+!   enddo
+!endif
+
+! Rotation matrix 
+!     \begin{align*}
+!     \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+
+!     First:
+!     part_1 = dgemm(W, dgemm(cos_tau, W^t))
+!     part_1a = dgemm(cos_tau, W^t)
+!     part_1 = dgemm(W, part_1a)
+!     And:
+!     part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+!     part_2a = dgemm(W^t, A)
+!     part_2b = dgemm(sin_tau, part_2a)
+!     part_2c = dgemm(tau_m1, part_2b)
+!     part_2 = dgemm(W, part_2c)
+!     Finally:
+!     Rotation matrix, R = part_1+part_2
+
+!     If $R$ is a rotation matrix:
+!     $R.R^T=R^T.R=\textbf{1}$
+
+! part_1
+call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+! part_2
+call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+! Rotation matrix R
+R = part_1 + part_2
+
+! Matrix check
+! R.R^t and R^t.R must be equal to identity matrix
+do j = 1, n
+   do i=1,n
+      if (i==j) then
+         RR_t(i,j) = 1d0
+      else
+         RR_t(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+norm = dnrm2(n*n,RR_t,1)
+print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+! Debug
+!if (debug) then
+!   print*, 'RR_t'
+!   do i = 1, n
+!      print*, RR_t(i,:)
+!   enddo
+!endif
+
+! Post conditions
+
+! Check if R.R^T=1
+max_elem = 0d0
+do j = 1, n
+   do i = 1, n 
+      if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+         max_elem = RR_t(i,j)
+      endif
+   enddo
+enddo
+
+print*, 'Max error in R.R^T:', max_elem
+print*, 'e_val(1):', e_val(1)
+print*, 'e_val(n):', e_val(n)
+print*, 'max elem in A:', max_elem_A
+
+if (ABS(max_elem) > 1d-12) then
+  print*, 'WARNING: max error in R.R^T > 1d-12'
+  print*, 'Enforce the step cancellation'
+  enforce_step_cancellation = .True.
+endif
+
+! Matrix elements of R must by non-NaN
+do j = 1,n
+   do i = 1,LDR
+      if (disnan(R(i,j))) then
+         info = 666
+         print*, 'NaN in rotation matrix'
+         call ABORT
+      endif
+   enddo
+enddo
+
+! Display
+!if (debug) then
+!   print*,'Rotation matrix :'
+!   do i = 1, n
+!      write(*,'(100(F10.5))') R(i,:)
+!   enddo
+!endif
+
+! Deallocation, end
+
+deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org
new file mode 100644
index 00000000..73ba0298
--- /dev/null
+++ b/src/utils_trust_region/rotation_matrix.org
@@ -0,0 +1,454 @@
+* Rotation matrix
+
+*Build a rotation matrix from an antisymmetric matrix*
+
+Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+$$
+\textbf{R}=\exp(\textbf{A})
+$$
+
+So :
+\begin{align*}
+\textbf{R}=& \exp(\textbf{A}) \\
+=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+\end{align*}
+
+With :
+$\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+$\tau$ : $\sqrt{-x}$
+$x$ : eigenvalues of $\textbf{A}^2$
+
+Input:
+| A(n,n) | double precision | antisymmetric matrix                                            |
+| n      | integer          | number of columns of the A matrix                               |
+| LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+| LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+Output:
+| R(n,n) | double precision | Rotation matrix                                      |
+| info   | integer          | if info = 0, the execution is successful             |
+|        |                  | if info = k, the k-th parameter has an illegal value |
+|        |                  | if info = -k, the algorithm failed                   |
+
+Internal:
+| B(n,n)        | double precision | B = A.A                                                       |
+| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+| lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+| W(n,n)        | double precision | eigenvectors of B                                             |
+| e_val(n)      | double precision | eigenvalues of B                                              |
+| m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+| cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+| sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+| tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+| part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+| part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+| part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+| part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+| part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+| part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+| RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+| norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+| i,j           | integer          | indexes                                                       |
+
+Functions:
+| dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+| disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+#+END_SRC
+
+** Pre-conditions
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Initialization
+  info=0
+  enforce_step_cancellation = .False.
+
+  ! Size of matrix A must be at least 1 by 1
+  if (n<1) then
+    info = 3
+    print*, 'WARNING: invalid parameter 5'
+    print*, 'n<1'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDA < n) then
+    info = 25
+    print*, 'WARNING: invalid parameter 2 or 5'
+    print*, 'LDA < n'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDR < n) then
+    info = 4
+    print*, 'WARNING: invalid parameter 4'
+    print*, 'LDR < n'
+    return
+  endif
+
+  ! Matrix elements of A must by non-NaN
+  do j = 1, n
+    do i = 1, n
+      if (disnan(A(i,j))) then
+        info=1
+        print*, 'WARNING: invalid parameter 1'
+        print*, 'NaN element in A matrix'
+        return
+      endif
+    enddo
+  enddo
+
+  do i = 1, n
+    if (A(i,i) /= 0d0) then
+      print*, 'WARNING: matrix A is not antisymmetric'
+      print*, 'Non 0 element on the diagonal', i, A(i,i)
+      call ABORT
+    endif
+  enddo
+
+  do j = 1, n
+    do i = 1, n
+      if (A(i,j)+A(j,i)>1d-16) then
+        print*, 'WANRING: matrix A is not antisymmetric'
+        print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+        print*, 'diff:', A(i,j)+A(j,i)
+        call ABORT 
+      endif
+    enddo
+  enddo
+
+  ! Fix for too big elements ! bad idea better to cancel if the error is too big
+  !do j = 1, n
+  !  do i = 1, n
+  !    A(i,j) = mod(A(i,j),2d0*pi)
+  !    if (dabs(A(i,j)) > pi) then
+  !      A(i,j) = 0d0
+  !    endif
+  !  enddo
+  !enddo
+
+  max_elem_A = 0d0
+  do j = 1, n
+    do i = 1, n
+      if (ABS(A(i,j)) > ABS(max_elem_A)) then
+        max_elem_A = A(i,j)
+      endif
+    enddo
+  enddo
+  print*,'max element in A', max_elem_A
+
+  if (ABS(max_elem_A) > 2 * pi) then
+     print*,''
+     print*,'WARNING: ABS(max_elem_A) > 2 pi '
+     print*,''
+  endif
+
+#+END_SRC
+
+** Calculations
+
+*** B=A.A
+    - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+    - Diagonalization of $\textbf{B}$ 
+    W, the eigenvectors
+    e_val, the eigenvalues
+
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Compute B=A.A
+
+  call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+  ! Copy B in W, diagonalization will put the eigenvectors in W
+  W=B
+
+  ! Diagonalization of B
+  ! Eigenvalues -> e_val
+  ! Eigenvectors -> W
+  lwork = 3*n-1
+  allocate(work(lwork,n))
+
+  print*,'Starting diagonalization ...'
+
+  call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+  deallocate(work)
+
+  if (info2 == 0) then
+     print*, 'Diagonalization : Done'
+  elseif (info2 < 0) then
+     print*, 'WARNING: error in the diagonalization'
+     print*, 'Illegal value of the ', info2,'-th parameter'
+  else
+     print*, "WARNING: Diagonalization failed to converge"
+  endif
+    #+END_SRC
+
+*** Tau^-1, cos(tau), sin(tau)
+    $$\tau = \sqrt{-x}$$
+    - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+    - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+    - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+    These matrices are diagonals
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Diagonal matrix m_diag
+  do j = 1, n
+     if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+        e_val(j) = 0.d0
+     else
+        e_val(j) = - e_val(j)
+     endif
+  enddo
+
+  m_diag = 0.d0
+  do i = 1, n
+     m_diag(i,i) = e_val(i)
+  enddo
+
+  ! cos_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+        else
+           cos_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! sin_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+        else
+           sin_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! Debug, display the cos_tau and sin_tau matrix
+  !if (debug) then
+  !   print*, 'cos_tau'
+  !   do i = 1, n
+  !      print*, cos_tau(i,:)
+  !   enddo
+  !   print*, 'sin_tau'
+  !   do i = 1, n
+  !      print*, sin_tau(i,:)
+  !   enddo
+  !endif
+
+  ! tau^-1
+  do j = 1, n
+     do i = 1, n
+        if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+           tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+        else
+           tau_m1(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  max_elem = 0d0
+  do i = 1, n
+     if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+        max_elem = tau_m1(i,i)
+     endif
+  enddo
+  print*,'max elem tau^-1:', max_elem
+
+  ! Debug
+  !print*,'eigenvalues:'
+  !do i = 1, n 
+  !  print*, e_val(i)
+  !enddo
+
+  !Debug, display tau^-1
+  !if (debug) then
+  !   print*, 'tau^-1'
+  !   do i = 1, n
+  !      print*,tau_m1(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Rotation matrix 
+    \begin{align*}
+    \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+
+    First:
+    part_1 = dgemm(W, dgemm(cos_tau, W^t))
+    part_1a = dgemm(cos_tau, W^t)
+    part_1 = dgemm(W, part_1a)
+    And:
+    part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+    part_2a = dgemm(W^t, A)
+    part_2b = dgemm(sin_tau, part_2a)
+    part_2c = dgemm(tau_m1, part_2b)
+    part_2 = dgemm(W, part_2c)
+    Finally:
+    Rotation matrix, R = part_1+part_2
+
+    If $R$ is a rotation matrix:
+    $R.R^T=R^T.R=\textbf{1}$
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! part_1
+  call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+  ! part_2
+  call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+  call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+  call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+  ! Rotation matrix R
+  R = part_1 + part_2
+
+  ! Matrix check
+  ! R.R^t and R^t.R must be equal to identity matrix
+  do j = 1, n
+     do i=1,n
+        if (i==j) then
+           RR_t(i,j) = 1d0
+        else
+           RR_t(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+  norm = dnrm2(n*n,RR_t,1)
+  print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+  ! Debug
+  !if (debug) then
+  !   print*, 'RR_t'
+  !   do i = 1, n
+  !      print*, RR_t(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Post conditions
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Check if R.R^T=1
+  max_elem = 0d0
+  do j = 1, n
+     do i = 1, n 
+        if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+           max_elem = RR_t(i,j)
+        endif
+     enddo
+  enddo
+
+  print*, 'Max error in R.R^T:', max_elem
+  print*, 'e_val(1):', e_val(1)
+  print*, 'e_val(n):', e_val(n)
+  print*, 'max elem in A:', max_elem_A
+
+  if (ABS(max_elem) > 1d-12) then
+    print*, 'WARNING: max error in R.R^T > 1d-12'
+    print*, 'Enforce the step cancellation'
+    enforce_step_cancellation = .True.
+  endif
+
+  ! Matrix elements of R must by non-NaN
+  do j = 1,n
+     do i = 1,LDR
+        if (disnan(R(i,j))) then
+           info = 666
+           print*, 'NaN in rotation matrix'
+           call ABORT
+        endif
+     enddo
+  enddo
+
+  ! Display
+  !if (debug) then
+  !   print*,'Rotation matrix :'
+  !   do i = 1, n
+  !      write(*,'(100(F10.5))') R(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+** Deallocation, end
+   #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
+   #+END_SRC
+
diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
new file mode 100644
index 00000000..bdd1f6ba
--- /dev/null
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
@@ -0,0 +1,64 @@
+! Rotation matrix in a subspace to rotation matrix in the full space
+
+! Usually, we are using a list of MOs, for exemple the active ones. When
+! we compute a rotation matrix to rotate the MOs, we just compute a
+! rotation matrix for these MOs in order to reduce the size of the
+! matrix which has to be computed. Since the computation of a rotation
+! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+! reuce the number of MOs involved.
+! After that we replace the rotation matrix in the full space by
+! building the elements of the rotation matrix in the full space from
+! the elements of the rotation matrix in the subspace and adding some 0
+! on the extradiagonal elements and some 1 on the diagonal elements,
+! for the MOs that are not involved in the rotation.
+
+! Provided:
+! | mo_num      | integer | Number of MOs                 |
+
+! Input:
+! | m           | integer          | Size of tmp_list, m <= mo_num   |
+! | tmp_list(m) | integer          | List of MOs                     |
+! | tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+! |             |                  | the MOs containing by tmp_list  |
+
+! Output:
+! | R(mo_num,mo_num | double precision | Rotation matrix in the space |
+! |                 |                  | of all the MOs               |
+
+! Internal:
+! | i,j         | integer | indexes in the full space |
+! | tmp_i,tmp_j | integer | indexes in the subspace   |
+
+
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org
new file mode 100644
index 00000000..16434dc8
--- /dev/null
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org
@@ -0,0 +1,65 @@
+* Rotation matrix in a subspace to rotation matrix in the full space
+
+Usually, we are using a list of MOs, for exemple the active ones. When
+we compute a rotation matrix to rotate the MOs, we just compute a
+rotation matrix for these MOs in order to reduce the size of the
+matrix which has to be computed. Since the computation of a rotation
+matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+reuce the number of MOs involved.
+After that we replace the rotation matrix in the full space by
+building the elements of the rotation matrix in the full space from
+the elements of the rotation matrix in the subspace and adding some 0
+on the extradiagonal elements and some 1 on the diagonal elements,
+for the MOs that are not involved in the rotation.
+
+Provided:
+| mo_num      | integer | Number of MOs                 |
+
+Input:
+| m           | integer          | Size of tmp_list, m <= mo_num   |
+| tmp_list(m) | integer          | List of MOs                     |
+| tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+|             |                  | the MOs containing by tmp_list  |
+
+Output:
+| R(mo_num,mo_num | double precision | Rotation matrix in the space |
+|                 |                  | of all the MOs               |
+
+Internal:
+| i,j         | integer | indexes in the full space |
+| tmp_i,tmp_j | integer | indexes in the subspace   |
+
+#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f 
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f
new file mode 100644
index 00000000..b7d849d1
--- /dev/null
+++ b/src/utils_trust_region/trust_region_expected_e.irp.f
@@ -0,0 +1,119 @@
+! Predicted energy : e_model
+
+! *Compute the energy predicted by the Taylor series*
+
+! The energy is predicted using a Taylor expansion truncated at te 2nd
+! order :
+
+! \begin{align*}
+! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+! \end{align*}
+
+! Input:
+! | n           | integer          | m*(m-1)/2                |
+! | v_grad(n)   | double precision | gradient                 |
+! | H(n,n)      | double precision | hessian                  |
+! | x(n)        | double precision | Step in the trust region |
+! | prev_energy | double precision | previous energy          |
+
+! Output:
+! | e_model | double precision | predicted energy after the rotation of the MOs |
+
+! Internal:
+! | part_1  | double precision | v_grad^T.x    |
+! | part_2  | double precision | 1/2 . x^T.H.x |
+! | part_2a | double precision | H.x           |
+! | i,j     | integer          | indexes       |
+
+! Function:
+! | ddot | double precision | dot product (Lapack) |
+
+
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+
+! Calculations
+
+! part_1 corresponds to the product g.x
+! part_2a corresponds to the product H.x
+! part_2 corresponds to the product 0.5*(x^T.H.x)
+
+! TODO: remove the dot products
+
+
+! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine
diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org
new file mode 100644
index 00000000..58c8f804
--- /dev/null
+++ b/src/utils_trust_region/trust_region_expected_e.org
@@ -0,0 +1,121 @@
+* Predicted energy : e_model
+
+*Compute the energy predicted by the Taylor series*
+
+The energy is predicted using a Taylor expansion truncated at te 2nd
+order :
+
+\begin{align*}
+E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+\end{align*}
+
+Input:
+| n           | integer          | m*(m-1)/2                |
+| v_grad(n)   | double precision | gradient                 |
+| H(n,n)      | double precision | hessian                  |
+| x(n)        | double precision | Step in the trust region |
+| prev_energy | double precision | previous energy          |
+
+Output:
+| e_model | double precision | predicted energy after the rotation of the MOs |
+
+Internal:
+| part_1  | double precision | v_grad^T.x    |
+| part_2  | double precision | 1/2 . x^T.H.x |
+| part_2a | double precision | H.x           |
+| i,j     | integer          | indexes       |
+
+Function:
+| ddot | double precision | dot product (Lapack) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+#+END_SRC
+
+** Calculations
+
+part_1 corresponds to the product g.x
+part_2a corresponds to the product H.x
+part_2 corresponds to the product 0.5*(x^T.H.x)
+
+TODO: remove the dot products
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+  ! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine 
+#+END_SRC 
diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f
new file mode 100644
index 00000000..f71bb405
--- /dev/null
+++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f
@@ -0,0 +1,1655 @@
+! Newton's method to find the optimal lambda
+
+! *Compute the lambda value for the trust region*
+
+! This subroutine uses the Newton method in order to find the optimal
+! lambda. This constant is added on the diagonal of the hessian to shift
+! the eiganvalues. It has a double role:
+! - ensure that the resulting hessian is positive definite for the
+!   Newton method
+! - constrain the step in the trust region, i.e.,
+!   $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius
+!   of the trust region.
+! We search $\lambda$ which minimizes
+! \begin{align*}
+!   f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2
+! \end{align*}
+! or
+! \begin{align*}
+!   \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2
+! \end{align*}
+! and gives obviously 0 in both cases. \newline
+
+! There are several cases:
+! - If $\textbf{H}$ is positive definite the interval containing the
+!   solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). 
+! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+!   \textbf{g} \neq 0$ then the interval containing
+!   the solution is  $\lambda \in (-h_1, \infty)$.
+! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ then the interval containing the solution is
+!   $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| <
+!   10^{-12}$ are not computed, so the term where $i = 1$ is
+!   automatically removed and this case becomes similar to the previous one.
+
+! So to avoid numerical problems (cf. trust_region) we start the
+! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$,
+! with $\epsilon$ a little constant.
+! The research must be restricted to the interval containing the
+! solution. For that reason a little trust region in 1D is used.  
+
+! The Newton method to find the optimal $\lambda$ is :
+! \begin{align*}
+!   \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\
+! \end{align*}
+! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to
+! $\lambda$ at the l-th iteration,
+! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to
+! $\lambda$ at the l-th iteration.\newline
+
+! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1}
+! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as 
+! \begin{align*}
+!   y \leq \alpha
+! \end{align*}
+! with $\alpha$ a scalar representing the trust length (trust region in
+! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the
+! Taylor series truncated at the second order. Thus, if $y > \alpha$,
+! the constraint is applied as  
+! \begin{align*}
+!   y^* = \alpha \frac{y}{|y|}
+! \end{align*}
+! with $y^*$ the solution in the trust region. 
+
+! The size of the trust region evolves in function of $\rho$ as for the
+! trust region seen previously cf. trust_region, rho_model.
+! The prediction of the value of $f$ or $\tilde{f}$ is done using the
+! Taylor series truncated at the second order cf. "trust_region",
+! "trust_e_model". 
+
+! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 -
+! \Delta^2)^2$ with respect to $\lambda$ are:
+! \begin{align*}
+!   \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+!   = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+!   \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+! \begin{align*}
+! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{align*}
+
+! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 -
+! 1/\Delta^2)^2$ with respect to $\lambda$ are:
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}
+!    {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+! Provided in qp_edit:
+! | thresh_rho_2          |
+! | thresh_cc             |
+! | nb_it_max_lambda      |
+! | version_lambda_search |
+! | nb_it_max_pre_search  |
+! see qp_edit for more details
+
+! Input:
+! | n          | integer          | m*(m-1)/2                  |
+! | e_val(n)   | double precision | eigenvalues of the hessian |
+! | tmp_wtg(n) | double precision | w_i^T.v_grad(i)            |
+! | delta      | double precision | delta for the trust region |
+
+! Output:
+! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step |
+! |        |                  | lambda > 0                                                               |
+
+! Internal:
+! | d1_N        | double precision | value of d1_norm_trust_region                                        |
+! | d2_N        | double precision | value of d2_norm_trust_region                                        |
+! | f_N         | double precision | value of f_norm_trust_region                                         |
+! | prev_f_N    | double precision | previous value of f_norm_trust_region                                |
+! | f_R         | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2               |
+! | prev_f_R    | double precision | previous value of f_R                                                |
+! | model       | double precision | predicted value of f_R from prev_f_R and y                           |
+! | d_1         | double precision | value of the first derivative                                        |
+! | d_2         | double precision | value of the second derivative                                       |
+! | y           | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda               |
+! | prev_lambda | double precision | previous value of lambda                                             |
+! | t1,t2,t3    | double precision | wall time                                                            |
+! | i           | integer          | index                                                                |
+! | epsilon     | double precision | little constant to avoid numerical problem                           |
+! | rho_2       | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R |
+! | version     | integer          | version of the root finding method                                   |
+
+! Function:
+! | d1_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+! | d2_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of  (1/norm(x)^2 - 1/Delta^2)^2 |
+! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 |
+! | f_norm_trust_region          | double precision | value of norm(x)^2                                                      |
+
+
+
+subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Research the optimal lambda to constrain the step size in the trust region
+  END_DOC
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(inout) :: e_val(n)
+  double precision, intent(in)    :: delta
+  double precision, intent(in)    :: tmp_wtg(n)
+
+  ! out
+  double precision, intent(out)   :: lambda
+
+  ! Internal
+  double precision                :: d1_N, d2_N, f_N, prev_f_N
+  double precision                :: prev_f_R, f_R
+  double precision                :: model
+  double precision                :: d_1, d_2
+  double precision                :: t1,t2,t3
+  integer                         :: i
+  double precision                :: epsilon
+  double precision                :: y
+  double precision                :: prev_lambda
+  double precision                :: rho_2
+  double precision                :: alpha
+  integer                         :: version
+
+  ! Functions
+  double precision                :: d1_norm_trust_region,d1_norm_trust_region_omp
+  double precision                :: d2_norm_trust_region, d2_norm_trust_region_omp
+  double precision                :: f_norm_trust_region, f_norm_trust_region_omp
+  double precision                :: d1_norm_inverse_trust_region
+  double precision                :: d2_norm_inverse_trust_region
+  double precision                :: d1_norm_inverse_trust_region_omp
+  double precision                :: d2_norm_inverse_trust_region_omp
+
+  print*,''
+  print*,'---Trust_newton---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! version_lambda_search
+  ! 1 -> ||x||^2 - delta^2 = 0,
+  ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better)
+  if (version_lambda_search == 1) then
+    print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0'
+  else
+    print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0'
+  endif
+  ! Version 2 is normally better
+
+
+
+! Resolution with the Newton method:
+
+
+! Initialization
+  epsilon = 1d-4
+  lambda =MAX(0d0, -e_val(1))
+  
+  ! Pre research of lambda to start near the optimal lambda
+  ! by adding a constant epsilon and changing the constant to
+  ! have ||x(lambda + epsilon)|| ~ delta, before setting
+  ! lambda = lambda + epsilon 
+  print*, 'Pre research of lambda:'
+  print*,'Initial lambda =', lambda
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+  print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta 
+  i = 1
+  
+  ! To increase lambda
+  if (f_N > delta**2) then
+    print*,'Increasing lambda...'
+    do while (f_N > delta**2 .and. i <= nb_it_max_pre_search)
+
+      ! Update the previous norm
+      prev_f_N = f_N
+      ! New epsilon
+      epsilon = epsilon * 2d0
+      ! New norm
+      f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+      print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+      
+      ! Security
+      if (prev_f_N < f_N) then
+        print*,'WARNING, error: prev_f_N < f_N, exit'
+        epsilon = epsilon * 0.5d0
+        i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  
+  ! To reduce lambda
+  else
+     print*,'Reducing lambda...'
+     do while (f_N < delta**2 .and. i <= nb_it_max_pre_search)
+
+       ! Update the previous norm
+       prev_f_N = f_N  
+       ! New epsilon
+       epsilon = epsilon * 0.5d0
+       ! New norm
+       f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+       print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+
+       ! Security
+       if (prev_f_N > f_N) then
+         print*,'WARNING, error: prev_f_N > f_N, exit'
+         epsilon = epsilon * 2d0
+         i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  endif
+
+  print*,'End of the pre research of lambda'
+  
+  ! New value of lambda
+  lambda = lambda + epsilon
+
+  print*, 'e_val(1):', e_val(1)
+  print*, 'Staring point, lambda =', lambda
+  
+  ! thresh_cc, threshold for the research of the optimal lambda
+  ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc
+  ! thresh_rho_2, threshold to cancel the step in the research
+  ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2
+  print*,'Threshold for the CC:', thresh_cc
+  print*,'Threshold for rho_2:', thresh_rho_2  
+
+  print*, 'w_1^T . g =', tmp_wtg(1)
+
+  ! Debug
+  !if (debug) then
+  !    print*, 'Iteration    rho_2    lambda    delta  ||x||  |1-(||x||^2/delta^2)|'
+  !endif
+
+  ! Initialization  
+  i = 1
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2
+  model = 0d0           ! predicted value of (||x||^2 - delta^2)^2
+  prev_f_N = 0d0    ! previous value of ||x||^2
+  prev_f_R = 0d0    ! previous value of (||x||^2 - delta^2)^2
+  f_R = 0d0         ! value of (||x||^2 - delta^2)^2
+  rho_2 = 0d0       ! (prev_f_R - f_R)/(prev_f_R - m)
+  y = 0d0           ! step size
+  prev_lambda = 0d0 ! previous lambda
+
+    ! Derivatives
+    if (version_lambda_search == 1) then
+      d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+      d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+    else
+      d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+    endif
+
+    ! Trust length
+    alpha = DABS((1d0/d_2)*d_1)
+
+    ! Newton's method
+    do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc)
+      print*,'--------------------------------------'
+      print*,'Research of lambda, iteration:', i
+      print*,'--------------------------------------'
+
+      ! Update of f_N, f_R and the derivatives
+      prev_f_N = f_N 
+      if (version_lambda_search == 1) then
+        prev_f_R = (prev_f_N - delta**2)**2
+        d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+        d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+      else
+        prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2
+        d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+        d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      endif
+      write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2  
+
+      ! Newton's step
+      y = -(1d0/DABS(d_2))*d_1
+
+      ! Constraint on y (the newton step)
+      if (DABS(y) > alpha) then
+        y = alpha * (y/DABS(y)) ! preservation of the sign of y
+      endif
+      write(*,'(a,E12.5)') ' Step length: ', y
+
+      ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series
+      model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2    
+
+      ! Updates lambda
+      prev_lambda = lambda
+      lambda = prev_lambda + y
+      print*,'prev lambda:', prev_lambda
+      print*,'new lambda:', lambda
+
+      ! Checks if lambda is in (-h_1, \infty)
+      if (lambda > MAX(0d0, -e_val(1))) then
+        ! New value of ||x(lambda)||^2
+        f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+        ! New f_R
+        if (version_lambda_search == 1) then
+          f_R = (f_N - delta**2)**2          ! new value of (||x(lambda)||^2 - delta^2)^2
+        else
+          f_R = (1d0/f_N - 1d0/delta**2)**2  ! new value of (1/||x(lambda)||^2 -1/delta^2)^2
+        endif
+        
+        if (version_lambda_search == 1) then
+          print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R
+          print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R
+          print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model
+        else
+          print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R
+          print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R
+          print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model
+        endif
+
+        print*,'previous - actual:', prev_f_R - f_R
+        print*,'previous - model:', prev_f_R - model
+
+        ! Check the gain
+        if (DABS(prev_f_R - model) < thresh_model_2) then
+          print*,''
+          print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity'
+          print*,''
+        endif        
+
+        ! Will be deleted
+        !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then
+        !  print*,''
+        !  print*,'WARNING: ABS(previous - model) <= 1d-16, exit'
+        !  print*,''
+        !  exit
+        !endif
+
+        ! Computes rho_2
+        rho_2 = (prev_f_R - f_R)/(prev_f_R - model)
+        print*,'rho_2:', rho_2               
+      else
+        rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty)
+        print*,'lambda < -e_val(1) ===> rho_2 = 0'
+      endif
+
+      ! Evolution of the trust length, alpha
+      if (rho_2 >= 0.75d0) then
+        alpha = 2d0 * alpha
+      elseif (rho_2 >= 0.5d0) then
+        alpha = alpha
+      elseif (rho_2 >= 0.25d0) then
+        alpha = 0.5d0 * alpha
+      else 
+        alpha = 0.25d0 * alpha
+      endif
+      write(*,'(a,E12.5)') ' New trust length alpha: ', alpha
+
+      ! cancellaion of the step if rho < 0.1
+      if (rho_2 < thresh_rho_2) then !0.1d0) then
+        lambda = prev_lambda
+        f_N = prev_f_N
+        print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda'
+      endif
+
+      print*,''
+      print*,'lambda, ||x||, delta:'
+      print*, lambda, dsqrt(f_N), delta
+      print*,'CC:', DABS(1d0 - f_N/delta**2)
+      print*,''
+      
+      i = i + 1
+    enddo
+
+  ! if trust newton failed
+  if (i > nb_it_max_lambda) then
+    print*,''
+    print*,'######################################################'
+    print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton'
+    print*,'The research of the optimal lambda has failed'
+    print*,'######################################################'
+    print*,''
+  endif
+
+  print*,'Number of iterations :', i
+  print*,'Value of lambda :', lambda
+  print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2
+  print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2
+  print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2
+
+  ! Time
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_newton:', t3
+
+  print*,'' 
+  print*,'---End trust_newton---'
+  print*,''
+
+end subroutine
+
+! OMP: First derivative of (||x||^2 - Delta^2)^2
+
+! *Function to compute the first derivative of (||x||^2 - Delta^2)^2*
+
+! This function computes the first derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+
+! \begin{align*}
+! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right)
+! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+
+! \begin{align*}
+!   \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\
+!   \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | mo_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1      | double precision | first sum of the formula           |
+! | accu2      | double precision | second sum of the formula          |
+! | tmp_accu1  | double precision | temporary array for the first sum  |
+! | tmp_accu2  | double precision | temporary array for the second sum |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+! | i,j        | integer          | indexes                            |
+
+! Function:
+! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+
+function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+   
+  ! Internal
+  double precision              :: wtg,accu1,accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2)  &
+      !$OMP DEFAULT(NONE)
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+ 
+  !$OMP MASTER
+  do i = 1, n 
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig) then
+      tmp_accu2(i) =  tmp_wtg(i)**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2)
+
+  deallocate(tmp_accu1, tmp_accu2)
+
+end function
+
+! OMP: Second derivative of (||x||^2 - Delta^2)^2
+
+! *Function to compute the second derivative of (||x||^2 - Delta^2)^2*
+
+! This function computes the second derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+! \begin{align*}
+! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{align*}
+
+! \begin{align*}
+!   \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+!   \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+!   \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} 
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1      | double precision | first sum of the formula           |
+! | accu2      | double precision | second sum of the formula          |
+! | accu3      | double precision | third sum of the formula           |
+! | tmp_accu1  | double precision | temporary array for the first sum  |
+! | tmp_accu2  | double precision | temporary array for the second sum |
+! | tmp_accu2  | double precision | temporary array for the third sum  |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+! | i,j        | integer          | indexes                            |
+
+! Function:
+! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+
+function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Functions
+  double precision              :: d2_norm_trust_region_omp
+  double precision              :: ddot
+
+  ! Internal
+  double precision              :: accu1,accu2,accu3
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+  integer :: i, j
+  
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0 
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n 
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations
+
+  ! accu1
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  ! accu2
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3
+    endif
+  enddo
+  !$OMP END DO
+ 
+  ! accu3
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2)
+
+  deallocate(tmp_accu1, tmp_accu2, tmp_accu3)
+
+end function
+
+! OMP: Function value of ||x||^2
+
+! *Compute the value of ||x||^2*
+
+! This function computes the value of ||x(lambda)||^2
+
+! \begin{align*}
+! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+
+! Internal:
+! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad   |
+! | tmp_fN     | double precision | temporary array for the function |
+! | i,j        | integer          | indexes                          |
+
+
+function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+  use omp_lib
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+ 
+  ! functions
+  double precision              :: f_norm_trust_region_omp
+ 
+  ! internal
+  double precision, allocatable :: tmp_fN(:)
+  integer                       :: i,j
+
+  ! Allocation
+  allocate(tmp_fN(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp)    &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  f_norm_trust_region_omp = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_fN(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations 
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+       tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+  
+  !$OMP MASTER
+  do i = 1, n
+    f_norm_trust_region_omp =  f_norm_trust_region_omp + tmp_fN(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  deallocate(tmp_fN)
+
+end function
+
+! First derivative of (||x||^2 - Delta^2)^2
+! Version without omp
+
+! *Function to compute the first derivative of ||x||^2 - Delta*
+
+! This function computes the first derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+
+! \begin{align*}
+! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1 | double precision | first sum of the formula               |
+! | accu2 | double precision | second sum of the formula              |
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+! | ddot                 | double precision | blas dot product                                                   |
+
+
+function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i, j
+
+  ! Functions
+  double precision             :: d1_norm_trust_region
+  double precision             :: ddot
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 
+    endif
+  enddo
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+
+  d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2)
+
+end function
+
+! Second derivative of (||x||^2 - Delta^2)^2
+! Version without OMP
+
+! *Function to compute the second derivative of ||x||^2 - Delta*
+
+
+! \begin{equation}
+! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{equation}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1 | double precision | first sum of the formula               |
+! | accu2 | double precision | second sum of the formula              |
+! | accu3 | double precision | third sum of the formula                |
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2       |
+! | ddot                 | double precision | blas dot product                                               |
+
+
+function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in) :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Functions
+  double precision :: d2_norm_trust_region
+  double precision :: ddot
+
+  ! Internal
+  double precision :: wtg,accu1,accu2,accu3
+  integer :: i, j
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3
+    endif
+  enddo
+
+  d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2)
+
+end function
+
+! Function value of ||x||^2
+! Version without OMP
+
+! *Compute the value of ||x||^2*
+
+! This function computes the value of ||x(lambda)||^2
+
+! \begin{align*}
+! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | f_norm_trust_region | double precision | value of norm(x)^2 |
+! | ddot                | double precision | blas dot product   |
+
+
+
+function f_norm_trust_region(n,e_val,tmp_wtg,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  
+  ! function
+  double precision             :: f_norm_trust_region
+  double precision             :: ddot
+
+  ! internal
+  integer                      :: i,j
+
+  ! Initialization
+  f_norm_trust_region = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then    
+      f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+
+end function
+
+! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version with OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg        | double precision | temporary variable to store W^T.v_grad |
+! | tmp_accu1  | double precision | temporary array for the first sum      |
+! | tmp_accu2  | double precision | temporary array for the second sum     |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+! | i,j        | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Internal
+  double precision              :: accu1, accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+
+  deallocate(tmp_accu1, tmp_accu2)
+ 
+end
+
+! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version with OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg        | double precision | temporary variable to store W^T.v_grad |
+! | tmp_accu1  | double precision | temporary array for the first sum      |
+! | tmp_accu2  | double precision | temporary array for the second sum     |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+! | i,j        | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision :: accu1, accu2, accu3
+  integer          :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+
+  ! Functions
+  double precision :: d2_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+   d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+
+  deallocate(tmp_accu1,tmp_accu2,tmp_accu3)
+ 
+end
+
+! First derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version without OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} 
+! \end{align*}
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d1_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+ 
+end
+
+! Second derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version without OMP
+
+! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d2_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2, accu3
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d2_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4
+    endif
+  enddo
+
+  d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+  
+end
diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org
new file mode 100644
index 00000000..b39c9a10
--- /dev/null
+++ b/src/utils_trust_region/trust_region_optimal_lambda.org
@@ -0,0 +1,1665 @@
+* Newton's method to find the optimal lambda
+
+*Compute the lambda value for the trust region*
+
+This subroutine uses the Newton method in order to find the optimal
+lambda. This constant is added on the diagonal of the hessian to shift
+the eiganvalues. It has a double role:
+- ensure that the resulting hessian is positive definite for the
+  Newton method
+- constrain the step in the trust region, i.e.,
+  $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius
+  of the trust region.
+We search $\lambda$ which minimizes
+\begin{align*}
+  f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2
+\end{align*}
+or
+\begin{align*}
+  \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2
+\end{align*}
+and gives obviously 0 in both cases. \newline
+
+There are several cases:
+- If $\textbf{H}$ is positive definite the interval containing the
+  solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). 
+- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+  \textbf{g} \neq 0$ then the interval containing
+  the solution is  $\lambda \in (-h_1, \infty)$.
+- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ then the interval containing the solution is
+  $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| <
+  10^{-12}$ are not computed, so the term where $i = 1$ is
+  automatically removed and this case becomes similar to the previous one.
+
+So to avoid numerical problems (cf. trust_region) we start the
+algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$,
+with $\epsilon$ a little constant.
+The research must be restricted to the interval containing the
+solution. For that reason a little trust region in 1D is used.  
+
+The Newton method to find the optimal $\lambda$ is :
+\begin{align*}
+  \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\
+\end{align*}
+$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to
+$\lambda$ at the l-th iteration,
+$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to
+$\lambda$ at the l-th iteration.\newline
+
+Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1}
+f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as 
+\begin{align*}
+  y \leq \alpha
+\end{align*}
+with $\alpha$ a scalar representing the trust length (trust region in
+1D) where the function $f$ or $\tilde{f}$ is correctly describe by the
+Taylor series truncated at the second order. Thus, if $y > \alpha$,
+the constraint is applied as  
+\begin{align*}
+  y^* = \alpha \frac{y}{|y|}
+\end{align*}
+with $y^*$ the solution in the trust region. 
+
+The size of the trust region evolves in function of $\rho$ as for the
+trust region seen previously cf. trust_region, rho_model.
+The prediction of the value of $f$ or $\tilde{f}$ is done using the
+Taylor series truncated at the second order cf. "trust_region",
+"trust_e_model". 
+
+The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 -
+\Delta^2)^2$ with respect to $\lambda$ are:
+\begin{align*}
+  \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+  = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+  \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+\begin{align*}
+\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{align*}
+
+The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 -
+1/\Delta^2)^2$ with respect to $\lambda$ are:
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}
+   {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+Provided in qp_edit:
+| thresh_rho_2          |
+| thresh_cc             |
+| nb_it_max_lambda      |
+| version_lambda_search |
+| nb_it_max_pre_search  |
+see qp_edit for more details
+
+Input:
+| n          | integer          | m*(m-1)/2                  |
+| e_val(n)   | double precision | eigenvalues of the hessian |
+| tmp_wtg(n) | double precision | w_i^T.v_grad(i)            |
+| delta      | double precision | delta for the trust region |
+
+Output:
+| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step |
+|        |                  | lambda > 0                                                               |
+
+Internal:
+| d1_N        | double precision | value of d1_norm_trust_region                                        |
+| d2_N        | double precision | value of d2_norm_trust_region                                        |
+| f_N         | double precision | value of f_norm_trust_region                                         |
+| prev_f_N    | double precision | previous value of f_norm_trust_region                                |
+| f_R         | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2               |
+| prev_f_R    | double precision | previous value of f_R                                                |
+| model       | double precision | predicted value of f_R from prev_f_R and y                           |
+| d_1         | double precision | value of the first derivative                                        |
+| d_2         | double precision | value of the second derivative                                       |
+| y           | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda               |
+| prev_lambda | double precision | previous value of lambda                                             |
+| t1,t2,t3    | double precision | wall time                                                            |
+| i           | integer          | index                                                                |
+| epsilon     | double precision | little constant to avoid numerical problem                           |
+| rho_2       | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R |
+| version     | integer          | version of the root finding method                                   |
+
+Function:
+| d1_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+| d2_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of  (1/norm(x)^2 - 1/Delta^2)^2 |
+| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 |
+| f_norm_trust_region          | double precision | value of norm(x)^2                                                      |
+
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Research the optimal lambda to constrain the step size in the trust region
+  END_DOC
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(inout) :: e_val(n)
+  double precision, intent(in)    :: delta
+  double precision, intent(in)    :: tmp_wtg(n)
+
+  ! out
+  double precision, intent(out)   :: lambda
+
+  ! Internal
+  double precision                :: d1_N, d2_N, f_N, prev_f_N
+  double precision                :: prev_f_R, f_R
+  double precision                :: model
+  double precision                :: d_1, d_2
+  double precision                :: t1,t2,t3
+  integer                         :: i
+  double precision                :: epsilon
+  double precision                :: y
+  double precision                :: prev_lambda
+  double precision                :: rho_2
+  double precision                :: alpha
+  integer                         :: version
+
+  ! Functions
+  double precision                :: d1_norm_trust_region,d1_norm_trust_region_omp
+  double precision                :: d2_norm_trust_region, d2_norm_trust_region_omp
+  double precision                :: f_norm_trust_region, f_norm_trust_region_omp
+  double precision                :: d1_norm_inverse_trust_region
+  double precision                :: d2_norm_inverse_trust_region
+  double precision                :: d1_norm_inverse_trust_region_omp
+  double precision                :: d2_norm_inverse_trust_region_omp
+
+  print*,''
+  print*,'---Trust_newton---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! version_lambda_search
+  ! 1 -> ||x||^2 - delta^2 = 0,
+  ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better)
+  if (version_lambda_search == 1) then
+    print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0'
+  else
+    print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0'
+  endif
+  ! Version 2 is normally better
+#+END_SRC
+
+Resolution with the Newton method:
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+  ! Initialization
+  epsilon = 1d-4
+  lambda =MAX(0d0, -e_val(1))
+  
+  ! Pre research of lambda to start near the optimal lambda
+  ! by adding a constant epsilon and changing the constant to
+  ! have ||x(lambda + epsilon)|| ~ delta, before setting
+  ! lambda = lambda + epsilon 
+  print*, 'Pre research of lambda:'
+  print*,'Initial lambda =', lambda
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+  print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta 
+  i = 1
+  
+  ! To increase lambda
+  if (f_N > delta**2) then
+    print*,'Increasing lambda...'
+    do while (f_N > delta**2 .and. i <= nb_it_max_pre_search)
+
+      ! Update the previous norm
+      prev_f_N = f_N
+      ! New epsilon
+      epsilon = epsilon * 2d0
+      ! New norm
+      f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+      print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+      
+      ! Security
+      if (prev_f_N < f_N) then
+        print*,'WARNING, error: prev_f_N < f_N, exit'
+        epsilon = epsilon * 0.5d0
+        i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  
+  ! To reduce lambda
+  else
+     print*,'Reducing lambda...'
+     do while (f_N < delta**2 .and. i <= nb_it_max_pre_search)
+
+       ! Update the previous norm
+       prev_f_N = f_N  
+       ! New epsilon
+       epsilon = epsilon * 0.5d0
+       ! New norm
+       f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+       print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+
+       ! Security
+       if (prev_f_N > f_N) then
+         print*,'WARNING, error: prev_f_N > f_N, exit'
+         epsilon = epsilon * 2d0
+         i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  endif
+
+  print*,'End of the pre research of lambda'
+  
+  ! New value of lambda
+  lambda = lambda + epsilon
+
+  print*, 'e_val(1):', e_val(1)
+  print*, 'Staring point, lambda =', lambda
+  
+  ! thresh_cc, threshold for the research of the optimal lambda
+  ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc
+  ! thresh_rho_2, threshold to cancel the step in the research
+  ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2
+  print*,'Threshold for the CC:', thresh_cc
+  print*,'Threshold for rho_2:', thresh_rho_2  
+
+  print*, 'w_1^T . g =', tmp_wtg(1)
+
+  ! Debug
+  !if (debug) then
+  !    print*, 'Iteration    rho_2    lambda    delta  ||x||  |1-(||x||^2/delta^2)|'
+  !endif
+
+  ! Initialization  
+  i = 1
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2
+  model = 0d0           ! predicted value of (||x||^2 - delta^2)^2
+  prev_f_N = 0d0    ! previous value of ||x||^2
+  prev_f_R = 0d0    ! previous value of (||x||^2 - delta^2)^2
+  f_R = 0d0         ! value of (||x||^2 - delta^2)^2
+  rho_2 = 0d0       ! (prev_f_R - f_R)/(prev_f_R - m)
+  y = 0d0           ! step size
+  prev_lambda = 0d0 ! previous lambda
+
+    ! Derivatives
+    if (version_lambda_search == 1) then
+      d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+      d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+    else
+      d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+    endif
+
+    ! Trust length
+    alpha = DABS((1d0/d_2)*d_1)
+
+    ! Newton's method
+    do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc)
+      print*,'--------------------------------------'
+      print*,'Research of lambda, iteration:', i
+      print*,'--------------------------------------'
+
+      ! Update of f_N, f_R and the derivatives
+      prev_f_N = f_N 
+      if (version_lambda_search == 1) then
+        prev_f_R = (prev_f_N - delta**2)**2
+        d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+        d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+      else
+        prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2
+        d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+        d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      endif
+      write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2  
+
+      ! Newton's step
+      y = -(1d0/DABS(d_2))*d_1
+
+      ! Constraint on y (the newton step)
+      if (DABS(y) > alpha) then
+        y = alpha * (y/DABS(y)) ! preservation of the sign of y
+      endif
+      write(*,'(a,E12.5)') ' Step length: ', y
+
+      ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series
+      model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2    
+
+      ! Updates lambda
+      prev_lambda = lambda
+      lambda = prev_lambda + y
+      print*,'prev lambda:', prev_lambda
+      print*,'new lambda:', lambda
+
+      ! Checks if lambda is in (-h_1, \infty)
+      if (lambda > MAX(0d0, -e_val(1))) then
+        ! New value of ||x(lambda)||^2
+        f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+        ! New f_R
+        if (version_lambda_search == 1) then
+          f_R = (f_N - delta**2)**2          ! new value of (||x(lambda)||^2 - delta^2)^2
+        else
+          f_R = (1d0/f_N - 1d0/delta**2)**2  ! new value of (1/||x(lambda)||^2 -1/delta^2)^2
+        endif
+        
+        if (version_lambda_search == 1) then
+          print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R
+          print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R
+          print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model
+        else
+          print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R
+          print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R
+          print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model
+        endif
+
+        print*,'previous - actual:', prev_f_R - f_R
+        print*,'previous - model:', prev_f_R - model
+
+        ! Check the gain
+        if (DABS(prev_f_R - model) < thresh_model_2) then
+          print*,''
+          print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity'
+          print*,''
+        endif        
+
+        ! Will be deleted
+        !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then
+        !  print*,''
+        !  print*,'WARNING: ABS(previous - model) <= 1d-16, exit'
+        !  print*,''
+        !  exit
+        !endif
+
+        ! Computes rho_2
+        rho_2 = (prev_f_R - f_R)/(prev_f_R - model)
+        print*,'rho_2:', rho_2               
+      else
+        rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty)
+        print*,'lambda < -e_val(1) ===> rho_2 = 0'
+      endif
+
+      ! Evolution of the trust length, alpha
+      if (rho_2 >= 0.75d0) then
+        alpha = 2d0 * alpha
+      elseif (rho_2 >= 0.5d0) then
+        alpha = alpha
+      elseif (rho_2 >= 0.25d0) then
+        alpha = 0.5d0 * alpha
+      else 
+        alpha = 0.25d0 * alpha
+      endif
+      write(*,'(a,E12.5)') ' New trust length alpha: ', alpha
+
+      ! cancellaion of the step if rho < 0.1
+      if (rho_2 < thresh_rho_2) then !0.1d0) then
+        lambda = prev_lambda
+        f_N = prev_f_N
+        print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda'
+      endif
+
+      print*,''
+      print*,'lambda, ||x||, delta:'
+      print*, lambda, dsqrt(f_N), delta
+      print*,'CC:', DABS(1d0 - f_N/delta**2)
+      print*,''
+      
+      i = i + 1
+    enddo
+
+  ! if trust newton failed
+  if (i > nb_it_max_lambda) then
+    print*,''
+    print*,'######################################################'
+    print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton'
+    print*,'The research of the optimal lambda has failed'
+    print*,'######################################################'
+    print*,''
+  endif
+
+  print*,'Number of iterations :', i
+  print*,'Value of lambda :', lambda
+  print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2
+  print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2
+  print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2
+
+  ! Time
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_newton:', t3
+
+  print*,'' 
+  print*,'---End trust_newton---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+* OMP: First derivative of (||x||^2 - Delta^2)^2
+
+*Function to compute the first derivative of (||x||^2 - Delta^2)^2*
+
+This function computes the first derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+
+\begin{align*}
+\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right)
+\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+
+\begin{align*}
+  \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\
+  \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| mo_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1      | double precision | first sum of the formula           |
+| accu2      | double precision | second sum of the formula          |
+| tmp_accu1  | double precision | temporary array for the first sum  |
+| tmp_accu2  | double precision | temporary array for the second sum |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+| i,j        | integer          | indexes                            |
+
+Function:
+| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+   
+  ! Internal
+  double precision              :: wtg,accu1,accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2)  &
+      !$OMP DEFAULT(NONE)
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+ 
+  !$OMP MASTER
+  do i = 1, n 
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig) then
+      tmp_accu2(i) =  tmp_wtg(i)**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2)
+
+  deallocate(tmp_accu1, tmp_accu2)
+
+end function
+#+END_SRC
+
+* OMP: Second derivative of (||x||^2 - Delta^2)^2
+
+*Function to compute the second derivative of (||x||^2 - Delta^2)^2*
+
+This function computes the second derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+\begin{align*}
+\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{align*}
+
+\begin{align*}
+  \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+  \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+  \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} 
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1      | double precision | first sum of the formula           |
+| accu2      | double precision | second sum of the formula          |
+| accu3      | double precision | third sum of the formula           |
+| tmp_accu1  | double precision | temporary array for the first sum  |
+| tmp_accu2  | double precision | temporary array for the second sum |
+| tmp_accu2  | double precision | temporary array for the third sum  |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+| i,j        | integer          | indexes                            |
+
+Function:
+| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Functions
+  double precision              :: d2_norm_trust_region_omp
+  double precision              :: ddot
+
+  ! Internal
+  double precision              :: accu1,accu2,accu3
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+  integer :: i, j
+  
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0 
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n 
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations
+
+  ! accu1
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  ! accu2
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3
+    endif
+  enddo
+  !$OMP END DO
+ 
+  ! accu3
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2)
+
+  deallocate(tmp_accu1, tmp_accu2, tmp_accu3)
+
+end function
+#+END_SRC
+
+* OMP: Function value of ||x||^2
+
+*Compute the value of ||x||^2*
+
+This function computes the value of ||x(lambda)||^2
+
+\begin{align*}
+||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+
+Internal:
+| tmp_wtg(n) | double precision | temporary array for W^T.v_grad   |
+| tmp_fN     | double precision | temporary array for the function |
+| i,j        | integer          | indexes                          |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+  use omp_lib
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+ 
+  ! functions
+  double precision              :: f_norm_trust_region_omp
+ 
+  ! internal
+  double precision, allocatable :: tmp_fN(:)
+  integer                       :: i,j
+
+  ! Allocation
+  allocate(tmp_fN(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp)    &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  f_norm_trust_region_omp = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_fN(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations 
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+       tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+  
+  !$OMP MASTER
+  do i = 1, n
+    f_norm_trust_region_omp =  f_norm_trust_region_omp + tmp_fN(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  deallocate(tmp_fN)
+
+end function
+#+END_SRC
+
+* First derivative of (||x||^2 - Delta^2)^2
+Version without omp
+
+*Function to compute the first derivative of ||x||^2 - Delta*
+
+This function computes the first derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+
+\begin{align*}
+\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1 | double precision | first sum of the formula               |
+| accu2 | double precision | second sum of the formula              |
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+| ddot                 | double precision | blas dot product                                                   |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i, j
+
+  ! Functions
+  double precision             :: d1_norm_trust_region
+  double precision             :: ddot
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 
+    endif
+  enddo
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+
+  d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2)
+
+end function
+#+END_SRC
+
+* Second derivative of (||x||^2 - Delta^2)^2
+Version without OMP
+
+*Function to compute the second derivative of ||x||^2 - Delta*
+
+
+\begin{equation}
+\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{equation}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1 | double precision | first sum of the formula               |
+| accu2 | double precision | second sum of the formula              |
+| accu3 | double precision | third sum of the formula                |
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2       |
+| ddot                 | double precision | blas dot product                                               |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in) :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Functions
+  double precision :: d2_norm_trust_region
+  double precision :: ddot
+
+  ! Internal
+  double precision :: wtg,accu1,accu2,accu3
+  integer :: i, j
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3
+    endif
+  enddo
+
+  d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2)
+
+end function
+#+END_SRC
+
+* Function value of ||x||^2
+Version without OMP
+
+*Compute the value of ||x||^2*
+
+This function computes the value of ||x(lambda)||^2
+
+\begin{align*}
+||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| f_norm_trust_region | double precision | value of norm(x)^2 |
+| ddot                | double precision | blas dot product   |
+
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function f_norm_trust_region(n,e_val,tmp_wtg,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  
+  ! function
+  double precision             :: f_norm_trust_region
+  double precision             :: ddot
+
+  ! internal
+  integer                      :: i,j
+
+  ! Initialization
+  f_norm_trust_region = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then    
+      f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+
+end function
+#+END_SRC
+
+* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2
+Version with OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg        | double precision | temporary variable to store W^T.v_grad |
+| tmp_accu1  | double precision | temporary array for the first sum      |
+| tmp_accu2  | double precision | temporary array for the second sum     |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+| i,j        | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Internal
+  double precision              :: accu1, accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+
+  deallocate(tmp_accu1, tmp_accu2)
+ 
+end
+#+END_SRC
+
+* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2
+Version with OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg        | double precision | temporary variable to store W^T.v_grad |
+| tmp_accu1  | double precision | temporary array for the first sum      |
+| tmp_accu2  | double precision | temporary array for the second sum     |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+| i,j        | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision :: accu1, accu2, accu3
+  integer          :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+
+  ! Functions
+  double precision :: d2_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+   d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+
+  deallocate(tmp_accu1,tmp_accu2,tmp_accu3)
+ 
+end
+#+END_SRC
+
+* First derivative of (1/||x||^2 - 1/Delta^2)^2
+Version without OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} 
+\end{align*}
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d1_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+ 
+end
+#+END_SRC
+
+* Second derivative of (1/||x||^2 - 1/Delta^2)^2
+Version without OMP
+
+*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d2_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2, accu3
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d2_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4
+    endif
+  enddo
+
+  d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+  
+end
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f
new file mode 100644
index 00000000..45738736
--- /dev/null
+++ b/src/utils_trust_region/trust_region_rho.irp.f
@@ -0,0 +1,121 @@
+! Agreement with the model: Rho
+
+! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+! Rho represents the agreement between the model (the predicted energy
+! by the Taylor expansion truncated at the 2nd order) and the real
+! energy : 
+
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+! With :
+! $E^{k}$ the energy at the previous iteration
+! $E^{k+1}$ the energy at the actual iteration
+! $m^{k+1}$ the predicted energy for the actual iteration
+! (cf. trust_e_model)
+
+! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+! If $\rho \leq 0$ the previous energy is lower than the actual 
+! energy. We have to cancel the last step and use a smaller trust
+! region.
+! Here we cancel the last step if $\rho < 0.1$, because even if
+! the energy decreases, the agreement is bad, i.e., the Taylor expansion
+! truncated at the second order doesn't represent correctly the energy
+! landscape. So it's better to cancel the step and restart with a
+! smaller trust region.
+
+! Provided in qp_edit:
+! | thresh_rho |
+
+! Input:
+! | prev_energy | double precision | previous energy (energy before the rotation) |
+! | e_model     | double precision | predicted energy after the rotation          |
+
+! Output:
+! | rho         | double precision | the agreement between the model (predicted) and the real energy |
+! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+! |             |                  | else the previous energy doesn't change                         |
+
+! Internal:
+! | energy | double precision | energy (real) after the rotation |
+! | i      | integer          | index                            |
+! | t*     | double precision | time                             |
+
+
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+
+! Rho
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+
+! In function of $\rho$ th step can be accepted or cancelled.
+
+! If we cancel the last step (k+1), the previous energy (k) doesn't
+! change!
+! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+
+! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org
new file mode 100644
index 00000000..9b25ee29
--- /dev/null
+++ b/src/utils_trust_region/trust_region_rho.org
@@ -0,0 +1,123 @@
+* Agreement with the model: Rho
+
+*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+Rho represents the agreement between the model (the predicted energy
+by the Taylor expansion truncated at the 2nd order) and the real
+energy : 
+
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+With :
+$E^{k}$ the energy at the previous iteration
+$E^{k+1}$ the energy at the actual iteration
+$m^{k+1}$ the predicted energy for the actual iteration
+(cf. trust_e_model)
+
+If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+If $\rho \leq 0$ the previous energy is lower than the actual 
+energy. We have to cancel the last step and use a smaller trust
+region.
+Here we cancel the last step if $\rho < 0.1$, because even if
+the energy decreases, the agreement is bad, i.e., the Taylor expansion
+truncated at the second order doesn't represent correctly the energy
+landscape. So it's better to cancel the step and restart with a
+smaller trust region.
+
+Provided in qp_edit:
+| thresh_rho |
+
+Input:
+| prev_energy | double precision | previous energy (energy before the rotation) |
+| e_model     | double precision | predicted energy after the rotation          |
+
+Output:
+| rho         | double precision | the agreement between the model (predicted) and the real energy |
+| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+|             |                  | else the previous energy doesn't change                         |
+
+Internal:
+| energy | double precision | energy (real) after the rotation |
+| i      | integer          | index                            |
+| t*     | double precision | time                             |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+#+END_SRC
+
+** Rho
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+
+In function of $\rho$ th step can be accepted or cancelled.
+
+If we cancel the last step (k+1), the previous energy (k) doesn't
+change!
+If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+  ! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f
new file mode 100644
index 00000000..42aa6ed4
--- /dev/null
+++ b/src/utils_trust_region/trust_region_step.irp.f
@@ -0,0 +1,716 @@
+! Trust region
+
+! *Compute the next step with the trust region algorithm*
+
+! The Newton method is an iterative method to find a minimum of a given
+! function. It uses a Taylor series truncated at the second order of the
+! targeted function and gives its minimizer. The minimizer is taken as
+! the new position and the same thing is done. And by doing so
+! iteratively the method find a minimum, a local or global one depending
+! of the starting point and the convexity/nonconvexity of the targeted
+! function.  
+
+! The goal of the trust region is to constrain the step size of the
+! Newton method in a certain area around the actual position, where the 
+! Taylor series is a good approximation of the targeted function. This
+! area is called the "trust region".
+
+! In addition, in function of the agreement between the Taylor
+! development of the energy and the real energy, the size of the trust
+! region will be updated at each iteration. By doing so, the step sizes
+! are not too larges. In addition, since we add a criterion to cancel the
+! step if the energy increases (more precisely if rho < 0.1), so it's
+! impossible to diverge. \newline
+
+! References: \newline
+! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+! ISBN: 978-0-387-40065-5 \newline
+
+! By using the first and the second derivatives, the Newton method gives
+! a step:
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+!   \textbf{g}_{(k)}
+! \end{align*}
+! which leads to the minimizer of the Taylor series.
+! !!! Warning: the Newton method gives the minimizer if and only if
+! $\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+! \begin{align*}
+!   ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+! \end{align*}
+! which is equivalent to 
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+! \end{align*}
+
+! with: \newline
+! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+! size n) \newline
+! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+! matrix) \newline
+! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+! size n) \newline
+! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+! \newline
+
+! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+! hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+! $\textbf{H}$ is positive definite, the
+! solution is the step given by the Newton method
+! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+! Else we have to constrain the step size. For simplicity we will remove
+! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+! to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+! Starting from the Taylor series of a function E (here, the energy)
+! truncated at the 2nd order, we have:
+! \begin{align*}
+!   E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+!   \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+!   \mathcal{O}(\textbf{x}^2)
+! \end{align*} 
+
+! With the constraint on the norm of $\textbf{x}$ we can write the
+! Lagrangian
+! \begin{align*}
+!   \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+!   + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+!   + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+! \end{align*}
+! Where: \newline
+! $\lambda$ is the Lagrange multiplier \newline
+! $E$ is the energy at the k-th iteration $\Leftrightarrow
+! E(\textbf{x} = \textbf{0})$ \newline
+
+! To solve this equation, we search a stationary point where the first
+! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+! \end{align*}
+
+! The derivative is:
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+!   = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+! \end{align*}
+
+! So, we search $\textbf{x}$ such as:
+! \begin{align*}
+! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+! \end{align*}
+
+! We can rewrite that as:
+! \begin{align*}
+!   \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+!   = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+! \end{align*}
+! with $\textbf{I}$ is the identity matrix. 
+
+! By doing so, the solution is:
+! \begin{align*}
+!   (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+! \end{align*}
+! \begin{align*}
+!   \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+! with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+! We have to solve this previous equation to find this $\textbf{x}$ in the
+! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+! just a one dimension problem because we can express $\textbf{x}$ as a
+! function of $\lambda$: 
+! \begin{align*}
+!   \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+
+! We start from the fact that the hessian is diagonalizable. So we have: 
+! \begin{align*}
+!   \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+! \end{align*}
+! with: \newline
+! $\textbf{H}$, the hessian matrix \newline
+! $\textbf{W}$, the matrix containing the eigenvectors \newline
+! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+! $h_i$, the i-th eigenvalue in ascending order \newline
+
+! Now we use the fact that adding a constant on the diagonal just shifts
+! the eigenvalues:
+! \begin{align*}
+!   \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+!   +\textbf{I} \lambda) \cdot \textbf{W}^T
+! \end{align*}
+
+! By doing so we can express $\textbf{x}$ as a function of $\lambda$
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+! with $\lambda \neq - h_i$.
+
+! An interesting thing in our case is the norm of $\textbf{x}$,
+! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+! \begin{align*}
+!   ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+!   \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+! And if we study the properties of this function we see that: 
+! \begin{align*}
+!   \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+! \end{align*}
+! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+! \begin{align*}
+!   \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+! \end{align*}
+
+! From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+! strictly decreasing function on the interval $\lambda \in
+! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+! solution. 
+
+! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+! Newton method is only defined for a positive definite hessian matrix,
+! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+! definite. Consequently, in the case where $\textbf{H}$ is not positive
+! definite, to ensure the positive definiteness, $\lambda$ must be
+! greater than $- h_1$.
+! \begin{align*}
+!   \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+! \end{align*} 
+
+! From that there are five cases:
+! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+!   \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+!   \lambda)$ 
+!   must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+!   similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+!   0)|| \leq \Delta$
+!   but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+
+! Thus to find the solution, we can write: 
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| = \Delta
+! \end{align*}
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| - \Delta = 0
+! \end{align*}
+
+! Taking the square of this equation
+! \begin{align*}
+!   (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+! \end{align*}
+! we have a function with one minimum for the optimal $\lambda$.
+! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+! \begin{align*}
+!   (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+! \end{align*}
+
+! But in practice, it is more effective to solve:
+! \begin{align*}
+!   (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+! \end{align*}
+
+! To do that, we just use the Newton method with "trust_newton" using
+! first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+! \Delta^2)^2$ with respect to $\textbf{x}$.
+! This will give the optimal $\lambda$ to compute the
+! solution $\textbf{x}$ with the formula seen previously:
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+
+
+
+! Evolution of the trust region
+
+! We initialize the trust region at the first iteration using a radius
+! \begin{align*}
+!   \Delta = ||\textbf{x}(\lambda=0)||
+! \end{align*}
+
+! And for the next iteration the trust region will evolves depending of
+! the agreement of the energy prediction based on the Taylor series
+! truncated at the 2nd order and the real energy. If the Taylor series
+! truncated at the 2nd order represents correctly the energy landscape
+! the trust region will be extent else it will be reduced. In order to
+! mesure this agreement we use the ratio rho cf. "rho_model" and
+! "trust_e_model". From that we use the following values:
+! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+! In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+! restarts with a smaller trust region until the energy decreases.
+
+
+
+
+! Summary
+
+! To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+! gradient and the radius of the trust region we can compute the norm of
+! the Newton step  
+! \begin{align*}
+!   ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+!   \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+! \end{align*}
+
+! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+!   $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+!   necessary to put a constraint on $\textbf{x}$, the solution is the
+!   unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+!   the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+!   -h_1)$, similarly to the previous case.
+!   But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+!   have to search $\lambda \in (-h_1, \infty)$ such as
+!   $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+!   \begin{align*}
+!     (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+!   \end{align*}
+!   or
+!   \begin{align*}
+!     (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+!   \end{align*}
+!   which is numerically more stable. And finally compute 
+!   \begin{align*}
+!     \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!     \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+!   \end{align*}
+! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+!   do exactly the same thing that the previous case but we search
+!   $\lambda \in (0, \infty)$ 
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+!   sum), again we do exactly the same thing that the previous case
+!   searching $\lambda \in (-h_1, \infty)$.
+  
+
+! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+! necessary in fact to remove the $j = 1$ in the sum since the term
+! where $h_i - \lambda < 10^{-6}$ are not computed.
+
+! After that, we take this vector $\textbf{x}^*$, called "x", and we do
+! the transformation to an antisymmetric matrix $\textbf{X}$, called
+! m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+! NB: 
+! An improvement can be done using a elleptical trust region.
+
+
+
+
+! Code
+
+! Provided:
+! | mo_num | integer | number of MOs |
+
+! Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+! Input:
+! | m         | integer          | number of MOs                                |
+! | n         | integer          | m*(m-1)/2                                       |
+! | H(n, n)   | double precision | hessian                                         |
+! | v_grad(n) | double precision | gradient                                        |
+! | e_val(n)  | double precision | eigenvalues of the hessian                      |
+! | W(n, n)   | double precision | eigenvectors of the hessian                     |
+! | rho       | double precision | agreement between the model and the reality,    |
+! |           |                  | represents the quality of the energy prediction |
+! | nb_iter   | integer          | number of iteration                             |
+
+! Input/Ouput:
+! | delta | double precision | radius of the trust region |
+
+! Output:
+! | x(n)      | double precision | vector containing the step |
+
+! Internal:
+! | accu          | double precision | temporary variable to compute the step       |
+! | lambda        | double precision | lagrange multiplier                          |
+! | trust_radius2 | double precision | square of the radius of the trust region     |
+! | norm2_x       | double precision | norm^2 of the vector x                       |
+! | norm2_g       | double precision | norm^2 of the vector containing the gradient |
+! | tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+! | i, j, k       | integer          | indexes                                      |
+
+! Function:
+! | dnrm2                   | double precision | Blas function computing the norm       |
+! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+
+! Initialization and norm
+
+! The norm of the step size will be useful for the trust region
+! algorithm. We start from a first guess and the radius of the trust
+! region will evolve during the optimization.
+
+! avoid_saddle is actually a test to avoid saddle points
+
+
+! Initialization of the Lagrange multiplier
+lambda = 0d0
+
+! List of w^T.g, to avoid the recomputation
+tmp_wtg = 0d0
+do j = 1, n
+  do i = 1, n
+    tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+  enddo
+enddo
+
+! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+! in the case of avoid_saddle
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  i = 2
+  ! Number of negative eigenvalues
+  do while (e_val(i) < - thresh_eig)
+    if (tmp_wtg(i) < thresh_wtg2) then
+      if (version_avoid_saddle == 1) then  
+        tmp_wtg(i) = 1d0
+      elseif  (version_avoid_saddle == 2) then  
+        tmp_wtg(i) = DABS(e_val(i))
+      elseif  (version_avoid_saddle == 3) then  
+        tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+      else
+        tmp_wtg(i) = thresh_wtg2
+      endif
+    endif
+    i = i + 1
+  enddo
+
+  ! For the fist one it's a little bit different
+  if (tmp_wtg(1) < thresh_wtg2) then 
+    tmp_wtg(1) = 0d0
+  endif
+
+endif 
+
+! Norm^2 of x, ||x||^2
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+! Anyway if the step is too big it will be reduced
+print*,'||x||^2 :', norm2_x
+
+! Norm^2 of the gradient, ||v_grad||^2
+norm2_g = (dnrm2(n,v_grad,1))**2
+print*,'||grad||^2 :', norm2_g
+
+! Trust radius initialization
+
+!     At the first iteration (nb_iter = 0) we initialize the trust region
+!     with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+!     (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+!     we compute this norm using f_norm_trust_region_omp as explain just
+!     below) 
+
+
+! trust radius
+if (nb_iter == 0) then
+   trust_radius2 = norm2_x 
+   ! To avoid infinite loop of cancellation of this first step
+   ! without changing delta
+   nb_iter = 1
+
+   ! Compute delta, delta = sqrt(trust_radius)
+   delta = dsqrt(trust_radius2)
+endif
+
+! Modification of the trust radius
+
+! In function of rho (which represents the agreement between the model
+! and the reality, cf. rho_model) the trust region evolves. We update
+! delta (the radius of the trust region).
+
+! To avoid too big trust region we put a maximum size.
+
+
+! Modification of the trust radius in function of rho
+if (rho >= 0.75d0) then
+   delta = 2d0 * delta
+elseif (rho >= 0.5d0) then
+   delta = delta
+elseif (rho >= 0.25d0) then
+   delta = 0.5d0 * delta
+else
+   delta = 0.25d0 * delta
+endif
+
+! Maximum size of the trust region
+!if (delta > 0.5d0 * n * pi) then
+!  delta = 0.5d0 * n * pi
+!  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+!endif
+
+if (delta > 1d10) then
+  delta = 1d10
+endif
+
+print*, 'Delta :', delta
+
+! Calculation of the optimal lambda
+
+! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+! - If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+!   $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+!   unconstrained one, $\lambda = 0$
+
+! You will find more details at the beginning
+
+
+! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+! and not (||x||^2 - delta)^2 = 0
+
+! Research of lambda to solve ||x(lambda)|| = Delta 
+
+! Display
+print*, 'e_val(1) = ', e_val(1)
+print*, 'w_1^T.g =', tmp_wtg(1)
+
+! H positive definite 
+if (e_val(1) > - thresh_eig) then
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  print*, '||x(0)||=', dsqrt(norm2_x)
+  print*, 'Delta=', delta
+
+  ! H positive definite, ||x(lambda = 0)|| <= Delta
+  if (dsqrt(norm2_x) <= delta) then 
+    print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+    print*, 'lambda = 0, no lambda optimization'
+    lambda = 0d0
+
+  ! H positive definite, ||x(lambda = 0)|| > Delta
+  else
+    ! Constraint solution
+    print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+    print*,'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+  endif
+
+! H indefinite
+else
+  if (DABS(tmp_wtg(1)) < thresh_wtg) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+    print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+  endif
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+  if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+    ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+    print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+    print*, 'lambda = -e_val(1), no lambda optimization'
+    lambda = - e_val(1)
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+  ! and
+  ! H indefinite, w_1^T.g =/= 0
+  else
+    ! Constraint solution/ add lambda
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+       print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+    else
+       print*, 'H indefinite, w_1^T.g =/= 0'
+    endif
+    print*, 'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+endif
+
+! Recomputation of the norm^2 of the step x
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+print*,''
+print*,'Summary after the trust region:'
+print*,'lambda:', lambda
+print*,'||x||:', dsqrt(norm2_x)
+print*,'delta:', delta
+
+! Calculation of the step x
+
+! x refers to $\textbf{x}^*$
+! We compute x in function of lambda using its formula :
+! \begin{align*}
+! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
+! + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+
+! Initialisation
+x = 0d0
+
+! Calculation of the step x
+
+! Normal version
+if (.not. absolute_eig) then
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+      enddo
+    endif
+  enddo
+
+! Version to use the absolute value of the eigenvalues
+else
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+      enddo
+    endif
+  enddo
+
+endif
+
+double precision :: beta, norm_x
+
+! Test
+! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+! eigenvectors multiply by a constant to ensure the condition
+! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+    ! norm of x
+    norm_x = dnrm2(n,x,1)
+
+    ! Computes the coefficient for the w_1 
+    beta = delta**2 - norm_x**2
+
+    ! Updates the step x
+    x = x + W(:,1) * dsqrt(beta)
+
+    ! Recomputes the norm to check
+    norm_x = dnrm2(n,x,1)
+
+    print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+    print*, '||x||', norm_x 
+  endif
+endif
+
+! Transformation of x
+
+! x is a vector of size n, so it can be write as a m by m
+! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+
+! Deallocation, end
+
+
+deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/trust_region_step.org
new file mode 100644
index 00000000..331453a3
--- /dev/null
+++ b/src/utils_trust_region/trust_region_step.org
@@ -0,0 +1,726 @@
+* Trust region
+
+*Compute the next step with the trust region algorithm*
+
+The Newton method is an iterative method to find a minimum of a given
+function. It uses a Taylor series truncated at the second order of the
+targeted function and gives its minimizer. The minimizer is taken as
+the new position and the same thing is done. And by doing so
+iteratively the method find a minimum, a local or global one depending
+of the starting point and the convexity/nonconvexity of the targeted
+function.  
+
+The goal of the trust region is to constrain the step size of the
+Newton method in a certain area around the actual position, where the 
+Taylor series is a good approximation of the targeted function. This
+area is called the "trust region".
+
+In addition, in function of the agreement between the Taylor
+development of the energy and the real energy, the size of the trust
+region will be updated at each iteration. By doing so, the step sizes
+are not too larges. In addition, since we add a criterion to cancel the
+step if the energy increases (more precisely if rho < 0.1), so it's
+impossible to diverge. \newline
+
+References: \newline
+Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+ISBN: 978-0-387-40065-5 \newline
+
+By using the first and the second derivatives, the Newton method gives
+a step:
+\begin{align*}
+  \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+  \textbf{g}_{(k)}
+\end{align*}
+which leads to the minimizer of the Taylor series.
+!!! Warning: the Newton method gives the minimizer if and only if
+$\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+\begin{align*}
+  ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+\end{align*}
+which is equivalent to 
+\begin{align*}
+  \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+\end{align*}
+
+with: \newline
+$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+size n) \newline
+$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+matrix) \newline
+$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+size n) \newline
+$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+\newline
+
+Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+$\textbf{H}$ is positive definite, the
+solution is the step given by the Newton method
+$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+Else we have to constrain the step size. For simplicity we will remove
+the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+Starting from the Taylor series of a function E (here, the energy)
+truncated at the 2nd order, we have:
+\begin{align*}
+  E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+  \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+  \mathcal{O}(\textbf{x}^2)
+\end{align*} 
+
+With the constraint on the norm of $\textbf{x}$ we can write the
+Lagrangian
+\begin{align*}
+  \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+  + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+  + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+\end{align*}
+Where: \newline
+$\lambda$ is the Lagrange multiplier \newline
+$E$ is the energy at the k-th iteration $\Leftrightarrow
+E(\textbf{x} = \textbf{0})$ \newline
+
+To solve this equation, we search a stationary point where the first
+derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+\end{align*}
+
+The derivative is:
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+  = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+\end{align*}
+
+So, we search $\textbf{x}$ such as:
+\begin{align*}
+\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+= \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+\end{align*}
+
+We can rewrite that as:
+\begin{align*}
+  \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+  = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+\end{align*}
+with $\textbf{I}$ is the identity matrix. 
+
+By doing so, the solution is:
+\begin{align*}
+  (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+\end{align*}
+\begin{align*}
+  \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+We have to solve this previous equation to find this $\textbf{x}$ in the
+trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+just a one dimension problem because we can express $\textbf{x}$ as a
+function of $\lambda$: 
+\begin{align*}
+  \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+
+We start from the fact that the hessian is diagonalizable. So we have: 
+\begin{align*}
+  \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+\end{align*}
+with: \newline
+$\textbf{H}$, the hessian matrix \newline
+$\textbf{W}$, the matrix containing the eigenvectors \newline
+$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+$h_i$, the i-th eigenvalue in ascending order \newline
+
+Now we use the fact that adding a constant on the diagonal just shifts
+the eigenvalues:
+\begin{align*}
+  \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+  +\textbf{I} \lambda) \cdot \textbf{W}^T
+\end{align*}
+
+By doing so we can express $\textbf{x}$ as a function of $\lambda$
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+with $\lambda \neq - h_i$.
+
+An interesting thing in our case is the norm of $\textbf{x}$,
+because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+\begin{align*}
+  ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+  \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+And if we study the properties of this function we see that: 
+\begin{align*}
+  \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+\end{align*}
+and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+\begin{align*}
+  \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+\end{align*}
+
+From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+strictly decreasing function on the interval $\lambda \in
+(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+solution. 
+
+Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+Newton method is only defined for a positive definite hessian matrix,
+so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+definite. Consequently, in the case where $\textbf{H}$ is not positive
+definite, to ensure the positive definiteness, $\lambda$ must be
+greater than $- h_1$.
+\begin{align*}
+  \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+\end{align*} 
+
+From that there are five cases:
+- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+  \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+  \lambda)$ 
+  must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+  similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+  0)|| \leq \Delta$
+  but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+
+Thus to find the solution, we can write: 
+\begin{align*}
+  ||\textbf{x}(\lambda)|| = \Delta
+\end{align*}
+\begin{align*}
+  ||\textbf{x}(\lambda)|| - \Delta = 0
+\end{align*}
+
+Taking the square of this equation
+\begin{align*}
+  (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+\end{align*}
+we have a function with one minimum for the optimal $\lambda$.
+Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+\begin{align*}
+  (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+\end{align*}
+
+But in practice, it is more effective to solve:
+\begin{align*}
+  (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+\end{align*}
+
+To do that, we just use the Newton method with "trust_newton" using
+first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+\Delta^2)^2$ with respect to $\textbf{x}$.
+This will give the optimal $\lambda$ to compute the
+solution $\textbf{x}$ with the formula seen previously:
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Evolution of the trust region
+
+We initialize the trust region at the first iteration using a radius
+\begin{align*}
+  \Delta = ||\textbf{x}(\lambda=0)||
+\end{align*}
+
+And for the next iteration the trust region will evolves depending of
+the agreement of the energy prediction based on the Taylor series
+truncated at the 2nd order and the real energy. If the Taylor series
+truncated at the 2nd order represents correctly the energy landscape
+the trust region will be extent else it will be reduced. In order to
+mesure this agreement we use the ratio rho cf. "rho_model" and
+"trust_e_model". From that we use the following values:
+- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+restarts with a smaller trust region until the energy decreases.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Summary
+
+To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+gradient and the radius of the trust region we can compute the norm of
+the Newton step  
+\begin{align*}
+  ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+  \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+\end{align*}
+
+- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+  $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+  necessary to put a constraint on $\textbf{x}$, the solution is the
+  unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+  the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+  -h_1)$, similarly to the previous case.
+  But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+  have to search $\lambda \in (-h_1, \infty)$ such as
+  $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+  \begin{align*}
+    (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+  \end{align*}
+  or
+  \begin{align*}
+    (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+  \end{align*}
+  which is numerically more stable. And finally compute 
+  \begin{align*}
+    \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+    \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+  \end{align*}
+- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+  do exactly the same thing that the previous case but we search
+  $\lambda \in (0, \infty)$ 
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+  sum), again we do exactly the same thing that the previous case
+  searching $\lambda \in (-h_1, \infty)$.
+  
+
+For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+necessary in fact to remove the $j = 1$ in the sum since the term
+where $h_i - \lambda < 10^{-6}$ are not computed.
+
+After that, we take this vector $\textbf{x}^*$, called "x", and we do
+the transformation to an antisymmetric matrix $\textbf{X}$, called
+m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+NB: 
+An improvement can be done using a elleptical trust region.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Code
+
+Provided:
+| mo_num | integer | number of MOs |
+
+Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+Input:
+| m         | integer          | number of MOs                                |
+| n         | integer          | m*(m-1)/2                                       |
+| H(n, n)   | double precision | hessian                                         |
+| v_grad(n) | double precision | gradient                                        |
+| e_val(n)  | double precision | eigenvalues of the hessian                      |
+| W(n, n)   | double precision | eigenvectors of the hessian                     |
+| rho       | double precision | agreement between the model and the reality,    |
+|           |                  | represents the quality of the energy prediction |
+| nb_iter   | integer          | number of iteration                             |
+
+Input/Ouput:
+| delta | double precision | radius of the trust region |
+
+Output:
+| x(n)      | double precision | vector containing the step |
+
+Internal:
+| accu          | double precision | temporary variable to compute the step       |
+| lambda        | double precision | lagrange multiplier                          |
+| trust_radius2 | double precision | square of the radius of the trust region     |
+| norm2_x       | double precision | norm^2 of the vector x                       |
+| norm2_g       | double precision | norm^2 of the vector containing the gradient |
+| tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+| i, j, k       | integer          | indexes                                      |
+
+Function:
+| dnrm2                   | double precision | Blas function computing the norm       |
+| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+#+END_SRC
+
+
+*** Initialization and norm
+
+The norm of the step size will be useful for the trust region
+algorithm. We start from a first guess and the radius of the trust
+region will evolve during the optimization.
+
+avoid_saddle is actually a test to avoid saddle points
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f                                                                                                                                                                                            
+  ! Initialization of the Lagrange multiplier
+  lambda = 0d0
+
+  ! List of w^T.g, to avoid the recomputation
+  tmp_wtg = 0d0
+  do j = 1, n
+    do i = 1, n
+      tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+    enddo
+  enddo
+
+  ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+  ! in the case of avoid_saddle
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    i = 2
+    ! Number of negative eigenvalues
+    do while (e_val(i) < - thresh_eig)
+      if (tmp_wtg(i) < thresh_wtg2) then
+        if (version_avoid_saddle == 1) then  
+          tmp_wtg(i) = 1d0
+        elseif  (version_avoid_saddle == 2) then  
+          tmp_wtg(i) = DABS(e_val(i))
+        elseif  (version_avoid_saddle == 3) then  
+          tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+        else
+          tmp_wtg(i) = thresh_wtg2
+        endif
+      endif
+      i = i + 1
+    enddo
+
+    ! For the fist one it's a little bit different
+    if (tmp_wtg(1) < thresh_wtg2) then 
+      tmp_wtg(1) = 0d0
+    endif
+
+  endif 
+
+  ! Norm^2 of x, ||x||^2
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+  ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+  ! Anyway if the step is too big it will be reduced
+  print*,'||x||^2 :', norm2_x
+
+  ! Norm^2 of the gradient, ||v_grad||^2
+  norm2_g = (dnrm2(n,v_grad,1))**2
+  print*,'||grad||^2 :', norm2_g
+#+END_SRC
+
+*** Trust radius initialization
+
+    At the first iteration (nb_iter = 0) we initialize the trust region
+    with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+    (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+    we compute this norm using f_norm_trust_region_omp as explain just
+    below) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! trust radius
+  if (nb_iter == 0) then
+     trust_radius2 = norm2_x 
+     ! To avoid infinite loop of cancellation of this first step
+     ! without changing delta
+     nb_iter = 1
+
+     ! Compute delta, delta = sqrt(trust_radius)
+     delta = dsqrt(trust_radius2)
+  endif
+#+END_SRC
+
+*** Modification of the trust radius
+
+In function of rho (which represents the agreement between the model
+and the reality, cf. rho_model) the trust region evolves. We update
+delta (the radius of the trust region).
+
+To avoid too big trust region we put a maximum size.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Modification of the trust radius in function of rho
+  if (rho >= 0.75d0) then
+     delta = 2d0 * delta
+  elseif (rho >= 0.5d0) then
+     delta = delta
+  elseif (rho >= 0.25d0) then
+     delta = 0.5d0 * delta
+  else
+     delta = 0.25d0 * delta
+  endif
+
+  ! Maximum size of the trust region
+  !if (delta > 0.5d0 * n * pi) then
+  !  delta = 0.5d0 * n * pi
+  !  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+  !endif
+
+  if (delta > 1d10) then
+    delta = 1d10
+  endif
+
+  print*, 'Delta :', delta
+#+END_SRC 
+  
+*** Calculation of the optimal lambda
+
+We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+- If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+  $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+  unconstrained one, $\lambda = 0$
+
+You will find more details at the beginning
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+  ! and not (||x||^2 - delta)^2 = 0
+  
+  ! Research of lambda to solve ||x(lambda)|| = Delta 
+
+  ! Display
+  print*, 'e_val(1) = ', e_val(1)
+  print*, 'w_1^T.g =', tmp_wtg(1)
+  
+  ! H positive definite 
+  if (e_val(1) > - thresh_eig) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+    print*, '||x(0)||=', dsqrt(norm2_x)
+    print*, 'Delta=', delta
+
+    ! H positive definite, ||x(lambda = 0)|| <= Delta
+    if (dsqrt(norm2_x) <= delta) then 
+      print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+      print*, 'lambda = 0, no lambda optimization'
+      lambda = 0d0
+
+    ! H positive definite, ||x(lambda = 0)|| > Delta
+    else
+      ! Constraint solution
+      print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+      print*,'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+  ! H indefinite
+  else
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+      norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+      print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+    endif
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+    if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+      ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+      print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+      print*, 'lambda = -e_val(1), no lambda optimization'
+      lambda = - e_val(1)
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+    ! and
+    ! H indefinite, w_1^T.g =/= 0
+    else
+      ! Constraint solution/ add lambda
+      if (DABS(tmp_wtg(1)) < thresh_wtg) then
+         print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+      else
+         print*, 'H indefinite, w_1^T.g =/= 0'
+      endif
+      print*, 'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+      endif
+     
+  endif
+  
+  ! Recomputation of the norm^2 of the step x
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+  print*,''
+  print*,'Summary after the trust region:'
+  print*,'lambda:', lambda
+  print*,'||x||:', dsqrt(norm2_x)
+  print*,'delta:', delta
+#+END_SRC
+
+*** Calculation of the step x
+
+x refers to $\textbf{x}^*$
+We compute x in function of lambda using its formula :
+\begin{align*}
+\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
++ \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Initialisation
+  x = 0d0
+
+  ! Calculation of the step x
+
+  ! Normal version
+  if (.not. absolute_eig) then
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+        enddo
+      endif
+    enddo
+
+  ! Version to use the absolute value of the eigenvalues
+  else
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+        enddo
+      endif
+    enddo
+
+  endif
+
+  double precision :: beta, norm_x
+ 
+  ! Test
+  ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+  ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+  ! eigenvectors multiply by a constant to ensure the condition
+  ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+      ! norm of x
+      norm_x = dnrm2(n,x,1)
+
+      ! Computes the coefficient for the w_1 
+      beta = delta**2 - norm_x**2
+
+      ! Updates the step x
+      x = x + W(:,1) * dsqrt(beta)
+
+      ! Recomputes the norm to check
+      norm_x = dnrm2(n,x,1)
+
+      print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+      print*, '||x||', norm_x 
+    endif
+  endif
+#+END_SRC
+
+*** Transformation of x
+
+x is a vector of size n, so it can be write as a m by m
+antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+    #+END_SRC
+
+*** Deallocation, end
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
+    #+END_SRC
+ 
diff --git a/src/utils_trust_region/vec_to_mat_index.irp.f b/src/utils_trust_region/vec_to_mat_index.irp.f
new file mode 100644
index 00000000..5d68748b
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_index.irp.f
@@ -0,0 +1,71 @@
+! Vector to matrix indexes
+  
+! *Compute the indexes p,q of a matrix element with the vector index i*
+
+! Vector (i) -> lower diagonal matrix (p,q), p > q
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | i | integer | index in the vector |
+
+! Ouput:
+! | p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+! |     |         | p > q,                                                  |
+! |     |         | p -> row,                                               |
+! |     |         | q -> column                                             |
+
+
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org
new file mode 100644
index 00000000..0a09fa86
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_index.org
@@ -0,0 +1,72 @@
+* Vector to matrix indexes
+  
+*Compute the indexes p,q of a matrix element with the vector index i*
+
+Vector (i) -> lower diagonal matrix (p,q), p > q
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| i | integer | index in the vector |
+
+Ouput:
+| p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+|     |         | p > q,                                                  |
+|     |         | p -> row,                                               |
+|     |         | q -> column                                             |
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f
new file mode 100644
index 00000000..9140b8d3
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_v2.irp.f
@@ -0,0 +1,39 @@
+! Vect to antisymmetric matrix using mat_to_vec_index
+
+! Vector to antisymmetric matrix transformation using mat_to_vec_index
+! subroutine.
+
+! Can be done in OMP (for the first part and with omp critical for the second)
+
+
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org
new file mode 100644
index 00000000..4e358a88
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_v2.org
@@ -0,0 +1,40 @@
+* Vect to antisymmetric matrix using mat_to_vec_index
+
+Vector to antisymmetric matrix transformation using mat_to_vec_index
+subroutine.
+
+Can be done in OMP (for the first part and with omp critical for the second)
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f 
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
+#+END_SRC
diff --git a/src/zmq/f77_zmq_free.h b/src/zmq/f77_zmq_free.h
deleted file mode 120000
index ac5e33cd..00000000
--- a/src/zmq/f77_zmq_free.h
+++ /dev/null
@@ -1 +0,0 @@
-../../include/f77_zmq_free.h
\ No newline at end of file
diff --git a/src/zmq/f77_zmq_module.f90 b/src/zmq/f77_zmq_module.F90
similarity index 50%
rename from src/zmq/f77_zmq_module.f90
rename to src/zmq/f77_zmq_module.F90
index 1e4a5af3..9171f11c 100644
--- a/src/zmq/f77_zmq_module.f90
+++ b/src/zmq/f77_zmq_module.F90
@@ -1,4 +1,4 @@
 module f77_zmq
- include 'f77_zmq_free.h'
+#include "f77_zmq_free.h"
 end module
 
diff --git a/tests/input/ch2.xyz b/tests/input/ch2.xyz
new file mode 100644
index 00000000..f008490c
--- /dev/null
+++ b/tests/input/ch2.xyz
@@ -0,0 +1,5 @@
+3
+
+C                    6.000000     0.000000     0.000000     0.173480
+H                    1.000000     0.000000    -0.861500    -0.520430
+H                    1.000000     0.000000     0.861500    -0.520430