diff --git a/configure b/configure
index 66bc9419..48e6fd12 100755
--- a/configure
+++ b/configure
@@ -215,7 +215,6 @@ EOF
               cd trexio-${VERSION}
               ./configure --prefix=\${QP_ROOT} --without-hdf5
               make -j 8 && make -j 8 check && make -j 8 install
-              cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files
               tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz 
               mv ninja "\${QP_ROOT}"/bin/
 EOF
@@ -229,7 +228,6 @@ EOF
               cd trexio-${VERSION}
               ./configure --prefix=\${QP_ROOT}
               make -j 8 && make -j 8 check && make -j 8 install
-              cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files
 EOF
  
 
diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml
index 841089ea..506cf069 100644
--- a/ocaml/Input_ao_basis.ml
+++ b/ocaml/Input_ao_basis.ml
@@ -44,8 +44,12 @@ end = struct
   let get_default = Qpackage.get_ezfio_default "ao_basis";;
 
   let read_ao_basis () =
-    Ezfio.get_ao_basis_ao_basis ()
-    |> AO_basis_name.of_string
+    let result =
+      Ezfio.get_ao_basis_ao_basis ()
+    in
+    if result <> "None" then
+      AO_basis_name.of_string result
+    else failwith "No basis"
   ;;
 
   let read_ao_num () =
@@ -192,7 +196,7 @@ end = struct
          ao_expo         ;
          ao_cartesian    ;
          ao_normalized   ;
-         primitives_normalized ; 
+         primitives_normalized ;
        } = b
      in
      write_md5 b ;
@@ -207,7 +211,7 @@ end = struct
      Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list
        ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ;
 
-     let ao_nucl = 
+     let ao_nucl =
        Array.to_list ao_nucl
        |> list_map Nucl_number.to_int
      in
@@ -215,7 +219,7 @@ end = struct
        ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ;
 
      let ao_power =
-       let l = Array.to_list ao_power in 
+       let l = Array.to_list ao_power in
        List.concat [
          (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ;
          (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ;
@@ -227,7 +231,7 @@ end = struct
      Ezfio.set_ao_basis_ao_cartesian(ao_cartesian);
      Ezfio.set_ao_basis_ao_normalized(ao_normalized);
      Ezfio.set_ao_basis_primitives_normalized(primitives_normalized);
-     
+
      let ao_coef =
       Array.to_list ao_coef
       |> list_map AO_coef.to_float
@@ -267,7 +271,10 @@ end = struct
         |> Ezfio.set_ao_basis_ao_md5 ;
         Some result
       with
-      | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None)
+      | _ -> ( "None"
+               |> Digest.string
+               |> Digest.to_hex
+               |> Ezfio.set_ao_basis_ao_md5 ; None)
   ;;
 
 
@@ -276,7 +283,7 @@ end = struct
       to_basis b
       |> Long_basis.of_basis
       |> Array.of_list
-    and unordered_basis = 
+    and unordered_basis =
       to_long_basis b
       |> Array.of_list
     in
@@ -289,15 +296,15 @@ end = struct
             (a.(i) <- None ; i)
           else
             find x a (i+1)
-      and find2 (s,g,n) a i = 
+      and find2 (s,g,n) a i =
         if i = Array.length a then -1
         else
-            match a.(i) with 
+            match a.(i) with
                 | None -> find2 (s,g,n) a (i+1)
                 | Some (s', g', n')  ->
                    if s <> s' || n <> n' then find2 (s,g,n) a (i+1)
                    else
-                   let lc  = list_map (fun (prim, _) -> prim) g.Gto.lc 
+                   let lc  = list_map (fun (prim, _) -> prim) g.Gto.lc
                    and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc
                    in
                    if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i)
@@ -313,13 +320,13 @@ end = struct
       let ao_num = List.length long_basis |> AO_number.of_int in
       let ao_prim_num =
         list_map (fun (_,g,_) -> List.length g.Gto.lc
-          |> AO_prim_number.of_int ) long_basis 
+          |> AO_prim_number.of_int ) long_basis
         |> Array.of_list
       and ao_nucl =
-        list_map (fun (_,_,n) -> n) long_basis 
+        list_map (fun (_,_,n) -> n) long_basis
         |> Array.of_list
       and ao_power =
-        list_map (fun (x,_,_) -> x) long_basis 
+        list_map (fun (x,_,_) -> x) long_basis
         |> Array.of_list
       in
       let ao_prim_num_max = Array.fold_left (fun s x ->
@@ -329,16 +336,16 @@ end = struct
       in
 
       let gtos =
-        list_map (fun (_,x,_) -> x) long_basis 
+        list_map (fun (_,x,_) -> x) long_basis
       in
       let create_expo_coef ec =
           let coefs =
             begin match ec with
             | `Coefs -> list_map (fun x->
-              list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos 
+              list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos
             | `Expos -> list_map (fun x->
               list_map (fun (prim,_) -> AO_expo.to_float
-              prim.GaussianPrimitive.expo) x.Gto.lc ) gtos 
+              prim.GaussianPrimitive.expo) x.Gto.lc ) gtos
             end
           in
           let rec get_n n accu = function
@@ -360,7 +367,7 @@ end = struct
       let ao_coef = create_expo_coef `Coefs
       |> Array.of_list
       |> Array.map AO_coef.of_float
-      and ao_expo = create_expo_coef `Expos 
+      and ao_expo = create_expo_coef `Expos
       |> Array.of_list
       |> Array.map AO_expo.of_float
       in
@@ -372,7 +379,7 @@ end = struct
         }
   ;;
 
-  let reorder b = 
+  let reorder b =
     let order = ordering b in
     let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in
     let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max
@@ -464,7 +471,7 @@ Basis set (read-only) ::
     | line :: tail ->
       let line = String.trim line in
       if line = "Basis set (read-only) ::" then
-        String.concat "\n" tail 
+        String.concat "\n" tail
       else
         extract_basis tail
     in
diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml
index a4e6176a..832b464e 100644
--- a/ocaml/Input_mo_basis.ml
+++ b/ocaml/Input_mo_basis.ml
@@ -56,7 +56,10 @@ end = struct
   let read_ao_md5 () =
     let ao_md5 =
       match (Input_ao_basis.Ao_basis.read ()) with
-      | None -> failwith "Unable to read AO basis"
+      | None -> ("None"
+                 |> Digest.string
+                 |> Digest.to_hex
+                 |> MD5.of_string)
       | Some result -> Input_ao_basis.Ao_basis.to_md5 result
     in
     let result =
diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py
similarity index 69%
rename from src/trexio/qp_import_trexio.py
rename to scripts/qp_import_trexio.py
index de8d1269..89096387 100755
--- a/src/trexio/qp_import_trexio.py
+++ b/scripts/qp_import_trexio.py
@@ -13,12 +13,17 @@ Options:
 
 import sys
 import os
-import trexio
 import numpy as np
 from functools import reduce
 from ezfio import ezfio
 from docopt import docopt
 
+try:
+  import trexio
+except ImportError:
+    print("Error: trexio python module is not found. Try python3 -m pip install trexio")
+    sys.exit(1)
+
 
 try:
     QP_ROOT = os.environ["QP_ROOT"]
@@ -90,14 +95,15 @@ def write_ezfio(trexio_filename, filename):
         p = re.compile(r'(\d*)$')
         label = [p.sub("", x).capitalize() for x in label]
         ezfio.set_nuclei_nucl_label(label)
+        print("OK")
 
     else:
         ezfio.set_nuclei_nucl_num(1)
         ezfio.set_nuclei_nucl_charge([0.])
         ezfio.set_nuclei_nucl_coord([0.,0.,0.])
         ezfio.set_nuclei_nucl_label(["X"])
+        print("None")
 
-    print("OK")
 
 
     print("Electrons\t...\t", end=' ')
@@ -105,12 +111,12 @@ def write_ezfio(trexio_filename, filename):
     try:
         num_beta = trexio.read_electron_dn_num(trexio_file)
     except:
-        num_beta = sum(charge)//2
+        num_beta = int(sum(charge))//2
 
     try:
         num_alpha = trexio.read_electron_up_num(trexio_file)
     except:
-        num_alpha = sum(charge) - num_beta
+        num_alpha = int(sum(charge)) - num_beta
 
     if num_alpha == 0:
         print("\n\nError: There are zero electrons in the TREXIO file.\n\n")
@@ -118,7 +124,7 @@ def write_ezfio(trexio_filename, filename):
     ezfio.set_electrons_elec_alpha_num(num_alpha)
     ezfio.set_electrons_elec_beta_num(num_beta)
 
-    print("OK")
+    print(f"{num_alpha} {num_beta}")
 
     print("Basis\t\t...\t", end=' ')
 
@@ -126,60 +132,113 @@ def write_ezfio(trexio_filename, filename):
     try:
         basis_type = trexio.read_basis_type(trexio_file)
 
-        if basis_type.lower() not in ["gaussian", "slater"]:
-            raise TypeError
+        if basis_type.lower() in ["gaussian", "slater"]:
+            shell_num   = trexio.read_basis_shell_num(trexio_file)
+            prim_num    = trexio.read_basis_prim_num(trexio_file)
+            ang_mom     = trexio.read_basis_shell_ang_mom(trexio_file)
+            nucl_index  = trexio.read_basis_nucleus_index(trexio_file)
+            exponent    = trexio.read_basis_exponent(trexio_file)
+            coefficient = trexio.read_basis_coefficient(trexio_file)
+            shell_index = trexio.read_basis_shell_index(trexio_file)
+            ao_shell    = trexio.read_ao_shell(trexio_file)
 
-        shell_num   = trexio.read_basis_shell_num(trexio_file)
-        prim_num    = trexio.read_basis_prim_num(trexio_file)
-        ang_mom     = trexio.read_basis_shell_ang_mom(trexio_file)
-        nucl_index  = trexio.read_basis_nucleus_index(trexio_file)
-        exponent    = trexio.read_basis_exponent(trexio_file)
-        coefficient = trexio.read_basis_coefficient(trexio_file)
-        shell_index = trexio.read_basis_shell_index(trexio_file)
-        ao_shell    = trexio.read_ao_shell(trexio_file)
+            ezfio.set_basis_basis("Read from TREXIO")
+            ezfio.set_ao_basis_ao_basis("Read from TREXIO")
+            ezfio.set_basis_shell_num(shell_num)
+            ezfio.set_basis_prim_num(prim_num)
+            ezfio.set_basis_shell_ang_mom(ang_mom)
+            ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ])
+            ezfio.set_basis_prim_expo(exponent)
+            ezfio.set_basis_prim_coef(coefficient)
 
-        ezfio.set_basis_basis("Read from TREXIO")
-        ezfio.set_basis_shell_num(shell_num)
-        ezfio.set_basis_prim_num(prim_num)
-        ezfio.set_basis_shell_ang_mom(ang_mom)
-        ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ])
-        ezfio.set_basis_prim_expo(exponent)
-        ezfio.set_basis_prim_coef(coefficient)
+            nucl_shell_num = []
+            prev = None
+            m = 0
+            for i in ao_shell:
+                if i != prev:
+                   m += 1
+                   if prev is None or nucl_index[i] != nucl_index[prev]:
+                        nucl_shell_num.append(m)
+                        m = 0
+                prev = i
+            assert (len(nucl_shell_num) == nucl_num)
 
-        nucl_shell_num = []
-        prev = None
-        m = 0
-        for i in ao_shell:
-            if i != prev:
-               m += 1
-               if prev is None or nucl_index[i] != nucl_index[prev]:
-                    nucl_shell_num.append(m)
-                    m = 0
-            prev = i
-        assert (len(nucl_shell_num) == nucl_num)
+            shell_prim_num = []
+            prev = shell_index[0]
+            count = 0
+            for i in shell_index:
+                if i != prev:
+                   shell_prim_num.append(count)
+                   count = 0
+                count += 1
+                prev = i
+            shell_prim_num.append(count)
 
-        shell_prim_num = []
-        prev = shell_index[0]
-        count = 0
-        for i in shell_index:
-            if i != prev:
-               shell_prim_num.append(count)
-               count = 0
-            count += 1
-            prev = i
-        shell_prim_num.append(count)
+            assert (len(shell_prim_num) == shell_num)
 
-        assert (len(shell_prim_num) == shell_num)
-
-        ezfio.set_basis_shell_prim_num(shell_prim_num)
-        ezfio.set_basis_shell_index([x+1 for x in shell_index])
-        ezfio.set_basis_nucleus_shell_num(nucl_shell_num)
+            ezfio.set_basis_shell_prim_num(shell_prim_num)
+            ezfio.set_basis_shell_index([x+1 for x in shell_index])
+            ezfio.set_basis_nucleus_shell_num(nucl_shell_num)
 
 
-        shell_factor = trexio.read_basis_shell_factor(trexio_file)
-        prim_factor  = trexio.read_basis_prim_factor(trexio_file)
+            shell_factor = trexio.read_basis_shell_factor(trexio_file)
+            prim_factor  = trexio.read_basis_prim_factor(trexio_file)
 
-        print("OK")
+        elif basis_type.lower() == "numerical":
+
+            shell_num   = trexio.read_basis_shell_num(trexio_file)
+            prim_num    = shell_num
+            ang_mom     = trexio.read_basis_shell_ang_mom(trexio_file)
+            nucl_index  = trexio.read_basis_nucleus_index(trexio_file)
+            exponent    = [1.]*prim_num
+            coefficient = [1.]*prim_num
+            shell_index = [i for i in range(shell_num)]
+            ao_shell    = trexio.read_ao_shell(trexio_file)
+
+            ezfio.set_basis_basis("None")
+            ezfio.set_ao_basis_ao_basis("None")
+            ezfio.set_basis_shell_num(shell_num)
+            ezfio.set_basis_prim_num(prim_num)
+            ezfio.set_basis_shell_ang_mom(ang_mom)
+            ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ])
+            ezfio.set_basis_prim_expo(exponent)
+            ezfio.set_basis_prim_coef(coefficient)
+
+            nucl_shell_num = []
+            prev = None
+            m = 0
+            for i in ao_shell:
+                if i != prev:
+                   m += 1
+                   if prev is None or nucl_index[i] != nucl_index[prev]:
+                        nucl_shell_num.append(m)
+                        m = 0
+                prev = i
+            assert (len(nucl_shell_num) == nucl_num)
+
+            shell_prim_num = []
+            prev = shell_index[0]
+            count = 0
+            for i in shell_index:
+                if i != prev:
+                   shell_prim_num.append(count)
+                   count = 0
+                count += 1
+                prev = i
+            shell_prim_num.append(count)
+
+            assert (len(shell_prim_num) == shell_num)
+
+            ezfio.set_basis_shell_prim_num(shell_prim_num)
+            ezfio.set_basis_shell_index([x+1 for x in shell_index])
+            ezfio.set_basis_nucleus_shell_num(nucl_shell_num)
+
+            shell_factor = trexio.read_basis_shell_factor(trexio_file)
+            prim_factor  = [1.]*prim_num
+        else:
+           raise TypeError
+
+        print(basis_type)
     except:
         print("None")
         ezfio.set_ao_basis_ao_cartesian(True)
@@ -256,9 +315,11 @@ def write_ezfio(trexio_filename, filename):
 #        ezfio.set_ao_basis_ao_prim_num_max(prim_num_max)
         ezfio.set_ao_basis_ao_coef(coef)
         ezfio.set_ao_basis_ao_expo(expo)
-        ezfio.set_ao_basis_ao_basis("Read from TREXIO")
 
-    print("OK")
+        print("OK")
+
+    else:
+        print("None")
 
 
     #                _
@@ -279,6 +340,7 @@ def write_ezfio(trexio_filename, filename):
     except:
       label = "None"
     ezfio.set_mo_basis_mo_label(label)
+    ezfio.set_determinants_mo_label(label)
 
     try:
       clss = trexio.read_mo_class(trexio_file)
@@ -303,10 +365,10 @@ def write_ezfio(trexio_filename, filename):
       for i in range(num_beta):
          mo_occ[i] += 1.
       ezfio.set_mo_basis_mo_occ(mo_occ)
+      print("OK")
     except:
-      pass
+      print("None")
 
-    print("OK")
 
 
     print("Pseudos\t\t...\t", end=' ')
@@ -386,9 +448,10 @@ def write_ezfio(trexio_filename, filename):
         ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl)
         ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl)
         ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl)
+        print("OK")
 
-
-    print("OK")
+    else:
+        print("None")
 
 
 
diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg
index 51d726da..6ad9b998 100644
--- a/src/ao_basis/EZFIO.cfg
+++ b/src/ao_basis/EZFIO.cfg
@@ -67,3 +67,15 @@ doc: Use normalized primitive functions
 interface: ezfio, provider
 default: true
 
+[ao_expoim_cosgtos]
+type: double precision
+doc: imag part for Exponents for each primitive of each cosGTOs |AO|
+size: (ao_basis.ao_num,ao_basis.ao_prim_num_max)
+interface: ezfio, provider
+
+[use_cosgtos]
+type: logical
+doc: If true, use cosgtos for AO integrals
+interface: ezfio
+default: False
+
diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f
new file mode 100644
index 00000000..721a3e57
--- /dev/null
+++ b/src/ao_basis/cosgtos.irp.f
@@ -0,0 +1,33 @@
+BEGIN_PROVIDER [ logical, use_cosgtos  ]
+  implicit none
+  BEGIN_DOC
+! If true, use cosgtos for AO integrals
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+    call ezfio_has_ao_basis_use_cosgtos(has)
+    if (has) then
+!      write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..'
+      call ezfio_get_ao_basis_use_cosgtos(use_cosgtos)
+    else
+      use_cosgtos = .False.
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( use_cosgtos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read use_cosgtos with MPI'
+    endif
+  IRP_ENDIF
+
+!  call write_time(6)
+
+END_PROVIDER
diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED
index b9caaf5d..61d23b1e 100644
--- a/src/ao_one_e_ints/NEED
+++ b/src/ao_one_e_ints/NEED
@@ -1,3 +1,2 @@
 ao_basis
 pseudo
-cosgtos_ao_int
diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/ao_one_e_ints/aos_cosgtos.irp.f
similarity index 100%
rename from src/cosgtos_ao_int/aos_cosgtos.irp.f
rename to src/ao_one_e_ints/aos_cosgtos.irp.f
diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f
similarity index 100%
rename from src/cosgtos_ao_int/one_e_Coul_integrals.irp.f
rename to src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f
diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f
similarity index 100%
rename from src/cosgtos_ao_int/one_e_kin_integrals.irp.f
rename to src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 928053ad..446bf730 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -455,10 +455,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in)
     do ix=0,nx
       X(ix) *= dble(c)
     enddo
-    call multiply_poly(X,nx,R2x,2,d,nd)
+!    call multiply_poly(X,nx,R2x,2,d,nd)
+    call multiply_poly_c2(X,nx,R2x,d,nd)
     ny=0
     call I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,Y,ny,n_pt_in)
-    call multiply_poly(Y,ny,R1x,2,d,nd)
+!    call multiply_poly(Y,ny,R1x,2,d,nd)
+    call multiply_poly_c2(Y,ny,R1x,d,nd)
   else
     do ix=0,n_pt_in
       X(ix) = 0.d0
@@ -469,7 +471,8 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in)
     do ix=0,nx
       X(ix) *= dble(a-1)
     enddo
-    call multiply_poly(X,nx,R2x,2,d,nd)
+!    call multiply_poly(X,nx,R2x,2,d,nd)
+    call multiply_poly_c2(X,nx,R2x,d,nd)
 
     nx = nd
     do ix=0,n_pt_in
@@ -479,10 +482,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in)
     do ix=0,nx
       X(ix) *= dble(c)
     enddo
-    call multiply_poly(X,nx,R2x,2,d,nd)
+!    call multiply_poly(X,nx,R2x,2,d,nd)
+    call multiply_poly_c2(X,nx,R2x,d,nd)
     ny=0
     call I_x1_pol_mult_one_e(a-1,c,R1x,R1xp,R2x,Y,ny,n_pt_in)
-    call multiply_poly(Y,ny,R1x,2,d,nd)
+!    call multiply_poly(Y,ny,R1x,2,d,nd)
+    call multiply_poly_c2(Y,ny,R1x,d,nd)
   endif
 end
 
@@ -519,7 +524,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim)
     do ix=0,nx
       X(ix) *= dble(c-1)
     enddo
-    call multiply_poly(X,nx,R2x,2,d,nd)
+!    call multiply_poly(X,nx,R2x,2,d,nd)
+    call multiply_poly_c2(X,nx,R2x,d,nd)
     ny = 0
     do ix=0,dim
       Y(ix) = 0.d0
@@ -527,7 +533,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim)
 
     call I_x1_pol_mult_one_e(0,c-1,R1x,R1xp,R2x,Y,ny,dim)
     if(ny.ge.0)then
-      call multiply_poly(Y,ny,R1xp,2,d,nd)
+!      call multiply_poly(Y,ny,R1xp,2,d,nd)
+      call multiply_poly_c2(Y,ny,R1xp,d,nd)
     endif
   endif
 end
diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg
index d4c995e6..9f523fca 100644
--- a/src/ao_two_e_ints/EZFIO.cfg
+++ b/src/ao_two_e_ints/EZFIO.cfg
@@ -4,6 +4,19 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ]
 interface: ezfio,provider,ocaml
 default: None
 
+[ao_integrals_threshold]
+type: Threshold
+doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero
+interface: ezfio,provider,ocaml
+default: 1.e-15
+ezfio_name: threshold_ao
+
+[ao_cholesky_threshold]
+type: Threshold
+doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
 [do_direct_integrals]
 type: logical
 doc: Compute integrals on the fly (very slow, only for debugging)
diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f
index d4c201aa..bb81b141 100644
--- a/src/ao_two_e_ints/cholesky.irp.f
+++ b/src/ao_two_e_ints/cholesky.irp.f
@@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ]
  ! Number of Cholesky vectors in AO basis
  END_DOC
 
- integer :: i,j,k,l
- double precision :: xnorm0, x, integral
- double precision, external :: ao_two_e_integral
-
- cholesky_ao_num_guess = 0
- xnorm0 = 0.d0
- x = 0.d0
- do j=1,ao_num
-   do i=1,ao_num
-     integral = ao_two_e_integral(i,i,j,j)
-     if (integral > ao_integrals_threshold) then
-       cholesky_ao_num_guess += 1
-     else
-       x += integral
-     endif
-   enddo
- enddo
- print *, 'Cholesky decomposition of AO integrals'
- print *, '--------------------------------------'
- print *, ''
- print *, 'Estimated Error: ', x
- print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)'
-
+ cholesky_ao_num_guess = ao_num*ao_num / 2
 END_PROVIDER
 
  BEGIN_PROVIDER [ integer, cholesky_ao_num ]
@@ -39,7 +17,7 @@ END_PROVIDER
  END_DOC
 
  type(c_ptr) :: ptr
- integer :: fd, i,j,k,l, rank
+ integer :: fd, i,j,k,l,m,rank
  double precision, pointer :: ao_integrals(:,:,:,:)
  double precision, external :: ao_two_e_integral
 
@@ -49,28 +27,90 @@ END_PROVIDER
    8, fd, .False., ptr)
  call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/))
 
- double precision :: integral
+ print*, 'Providing the AO integrals (Cholesky)'
+ call wall_time(wall_1)
+ call cpu_time(cpu_1)
+
+ ao_integrals = 0.d0
+
+ double precision :: integral, cpu_1, cpu_2, wall_1, wall_2
  logical, external :: ao_two_e_integral_zero
- !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic)
- do l=1,ao_num
-  do j=1,l
-   do k=1,ao_num
-    do i=1,k
-     if (ao_two_e_integral_zero(i,j,k,l)) cycle
-     integral = ao_two_e_integral(i,k,j,l)
-     ao_integrals(i,k,j,l) = integral
-     ao_integrals(k,i,j,l) = integral
-     ao_integrals(i,k,l,j) = integral
-     ao_integrals(k,i,l,j) = integral
-    enddo
+  double precision, external :: get_ao_two_e_integral
+
+ if (read_ao_two_e_integrals) then
+   PROVIDE ao_two_e_integrals_in_map
+
+   !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
+   do m=0,9
+     do l=1+m,ao_num,10
+       !$OMP DO SCHEDULE(dynamic)
+       do j=1,l
+         do k=1,ao_num
+           do i=1,min(k,j)
+             if (ao_two_e_integral_zero(i,j,k,l)) cycle
+             integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map)
+             ao_integrals(i,k,j,l) = integral
+             ao_integrals(k,i,j,l) = integral
+             ao_integrals(i,k,l,j) = integral
+             ao_integrals(k,i,l,j) = integral
+             ao_integrals(j,l,i,k) = integral
+             ao_integrals(j,l,k,i) = integral
+             ao_integrals(l,j,i,k) = integral
+             ao_integrals(l,j,k,i) = integral
+           enddo
+         enddo
+       enddo
+       !$OMP END DO NOWAIT
+     enddo
+     !$OMP MASTER
+     call wall_time(wall_2)
+     print '(I10,'' %  in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
+     !$OMP END MASTER
    enddo
-  enddo
- enddo
- !$OMP END PARALLEL DO
+   !$OMP END PARALLEL
+
+ else
+
+   !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
+   do m=0,9
+     do l=1+m,ao_num,10
+       !$OMP DO SCHEDULE(dynamic)
+       do j=1,l
+         do k=1,ao_num
+           do i=1,min(k,j)
+             if (ao_two_e_integral_zero(i,j,k,l)) cycle
+             integral = ao_two_e_integral(i,k,j,l)
+             ao_integrals(i,k,j,l) = integral
+             ao_integrals(k,i,j,l) = integral
+             ao_integrals(i,k,l,j) = integral
+             ao_integrals(k,i,l,j) = integral
+             ao_integrals(j,l,i,k) = integral
+             ao_integrals(j,l,k,i) = integral
+             ao_integrals(l,j,i,k) = integral
+             ao_integrals(l,j,k,i) = integral
+           enddo
+         enddo
+       enddo
+       !$OMP END DO NOWAIT
+     enddo
+     !$OMP MASTER
+     call wall_time(wall_2)
+     print '(I10,'' %  in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
+     !$OMP END MASTER
+   enddo
+   !$OMP END PARALLEL
+
+   call wall_time(wall_2)
+   call cpu_time(cpu_2)
+   print*, 'AO integrals provided:'
+   print*, ' cpu  time :',cpu_2 - cpu_1, 's'
+   print*, ' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
+
+ endif
 
  ! Call Lapack
  cholesky_ao_num = cholesky_ao_num_guess
- call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao)
+ call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao)
  print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)'
 
  ! Remove mmap
diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/ao_two_e_ints/gauss_legendre.irp.f
similarity index 100%
rename from src/cosgtos_ao_int/gauss_legendre.irp.f
rename to src/ao_two_e_ints/gauss_legendre.irp.f
diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f
similarity index 100%
rename from src/cosgtos_ao_int/two_e_Coul_integrals.irp.f
rename to src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index 82ffbc90..85ff5bcf 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -590,8 +590,20 @@ double precision function general_primitive_integral(dim,            &
     d_poly(i)=0.d0
   enddo
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
+!  call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
+  integer :: ib, ic
+  if (ior(n_Ix,n_Iy) >= 0) then
+    do ib=0,n_Ix
+      do ic = 0,n_Iy
+        d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib)
+      enddo
+    enddo
+
+    do n_pt_tmp = n_Ix+n_Iy, 0, -1
+      if (d_poly(n_pt_tmp) /= 0.d0) exit
+    enddo
+  endif
+
   if (n_pt_tmp == -1) then
     return
   endif
@@ -600,8 +612,21 @@ double precision function general_primitive_integral(dim,            &
     d1(i)=0.d0
   enddo
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
+!  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
+  if (ior(n_pt_tmp,n_Iz) >= 0) then
+    ! Bottleneck here
+    do ib=0,n_pt_tmp
+      do ic = 0,n_Iz
+        d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib)
+      enddo
+    enddo
+
+    do n_pt_out = n_pt_tmp+n_Iz, 0, -1
+      if (d1(n_pt_out) /= 0.d0) exit
+    enddo
+  endif
+
+
   double precision               :: rint_sum
   accu = accu + rint_sum(n_pt_out,const,d1)
 
@@ -948,8 +973,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
     X(ix) *= dble(a-1)
   enddo
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(X,nx,B_10,2,d,nd)
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(X,nx,B_10,2,d,nd)
+  call multiply_poly_c2(X,nx,B_10,d,nd)
 
   nx = nd
   !DIR$ LOOP COUNT(8)
@@ -970,8 +996,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
         X(ix) *= c
       enddo
     endif
-    !DIR$ FORCEINLINE
-    call multiply_poly(X,nx,B_00,2,d,nd)
+!    !DIR$ FORCEINLINE
+!    call multiply_poly(X,nx,B_00,2,d,nd)
+    call multiply_poly_c2(X,nx,B_00,d,nd)
   endif
 
   ny=0
@@ -988,9 +1015,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
     call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
   endif
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(Y,ny,C_00,2,d,nd)
-
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(Y,ny,C_00,2,d,nd)
+  call multiply_poly_c2(Y,ny,C_00,d,nd)
 end
 
 recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
@@ -1028,8 +1055,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
     enddo
   endif
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(X,nx,B_00,2,d,nd)
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(X,nx,B_00,2,d,nd)
+  call multiply_poly_c2(X,nx,B_00,d,nd)
 
   ny=0
 
@@ -1039,8 +1067,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   enddo
   call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(Y,ny,C_00,2,d,nd)
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(Y,ny,C_00,2,d,nd)
+  call multiply_poly_c2(Y,ny,C_00,d,nd)
 
 end
 
@@ -1067,8 +1096,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   nx = 0
   call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(X,nx,B_10,2,d,nd)
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(X,nx,B_10,2,d,nd)
+  call multiply_poly_c2(X,nx,B_10,d,nd)
 
   nx = nd
   !DIR$ LOOP COUNT(8)
@@ -1086,8 +1116,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
     enddo
   endif
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(X,nx,B_00,2,d,nd)
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(X,nx,B_00,2,d,nd)
+  call multiply_poly_c2(X,nx,B_00,d,nd)
 
   ny=0
   !DIR$ LOOP COUNT(8)
@@ -1097,9 +1128,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   !DIR$ FORCEINLINE
   call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
 
-  !DIR$ FORCEINLINE
-  call multiply_poly(Y,ny,C_00,2,d,nd)
-
+!  !DIR$ FORCEINLINE
+!  call multiply_poly(Y,ny,C_00,2,d,nd)
+  call multiply_poly_c2(Y,ny,C_00,d,nd)
 end
 
 recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
@@ -1146,8 +1177,10 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
       Y(1) = D_00(1)
       Y(2) = D_00(2)
 
-      !DIR$ FORCEINLINE
-      call multiply_poly(Y,ny,D_00,2,d,nd)
+!      !DIR$ FORCEINLINE
+!      call multiply_poly(Y,ny,D_00,2,d,nd)
+      call multiply_poly_c2(Y,ny,D_00,d,nd)
+
       return
 
       case default
@@ -1164,8 +1197,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
         X(ix) *= dble(c-1)
       enddo
 
-      !DIR$ FORCEINLINE
-      call multiply_poly(X,nx,B_01,2,d,nd)
+!      !DIR$ FORCEINLINE
+!      call multiply_poly(X,nx,B_01,2,d,nd)
+      call multiply_poly_c2(X,nx,B_01,d,nd)
 
       ny = 0
       !DIR$ LOOP COUNT(6)
@@ -1174,8 +1208,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
       enddo
       call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
 
-      !DIR$ FORCEINLINE
-      call multiply_poly(Y,ny,D_00,2,d,nd)
+!      !DIR$ FORCEINLINE
+!      call multiply_poly(Y,ny,D_00,2,d,nd)
+      call multiply_poly_c2(Y,ny,D_00,d,nd)
 
   end select
 end
@@ -1233,3 +1268,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
   enddo
 
 end
+
+
+subroutine multiply_poly_local(b,nb,c,nc,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb, nc
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:nb), c(0:nc)
+  double precision, intent(inout) :: d(0:nb+nc)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0
+
+  do ib=0,nb
+    do ic = 0,nc
+      d(ib+ic) = d(ib+ic) + c(ic) * b(ib)
+    enddo
+  enddo
+
+  do nd = nb+nc,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f
index ca50dd56..f7a42f37 100644
--- a/src/bi_ort_ints/bi_ort_ints.irp.f
+++ b/src/bi_ort_ints/bi_ort_ints.irp.f
@@ -7,7 +7,13 @@ program bi_ort_ints
   my_n_pt_r_grid = 10
   my_n_pt_a_grid = 14
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
- call test_3e
+! call test_3e
+ call test_5idx
+!  call test_5idx2
+end
+
+subroutine test_5idx2
+  PROVIDE three_e_5_idx_cycle_2_bi_ort
 end
 
 subroutine test_3e
@@ -16,11 +22,12 @@ subroutine test_3e
  double precision :: accu, contrib,new,ref
  i = 1
  k = 1
+ n = 0
  accu = 0.d0
  do i = 1, mo_num
-  do k = 1, mo_num 
+  do k = 1, mo_num
    do j = 1, mo_num
-    do l = 1, mo_num 
+    do l = 1, mo_num
      do m = 1, mo_num
       do n = 1, mo_num
         call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new)
@@ -31,6 +38,7 @@ subroutine test_3e
          print*,'pb !!'
          print*,i,k,j,l,m,n
          print*,ref,new,contrib
+         stop
         endif
       enddo
      enddo
@@ -42,3 +50,93 @@ subroutine test_3e
 
 
 end
+
+subroutine test_5idx
+ implicit none
+ integer :: i,k,j,l,m,n,ipoint
+ double precision :: accu, contrib,new,ref
+ i = 1
+ k = 1
+ n = 0
+ accu = 0.d0
+ do i = 1, mo_num
+  do k = 1, mo_num
+   do j = 1, mo_num
+    do l = 1, mo_num
+     do m = 1, mo_num
+
+      new = three_e_5_idx_direct_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'direct'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+
+      new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'exch12'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+!
+      new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'cycle1'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+
+      new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'cycle2'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+
+      new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'exch23'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+
+      new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i)
+      ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i)
+      contrib = dabs(new - ref)
+      accu += contrib
+      if(contrib .gt. 1.d-10)then
+       print*,'exch13'
+       print*,i,k,j,l,m
+       print*,ref,new,contrib
+       stop
+      endif
+
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu = ',accu/dble(mo_num)**5
+
+
+end
diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f
index 507408e5..1b2e777e 100644
--- a/src/bi_ort_ints/three_body_ijmkl.irp.f
+++ b/src/bi_ort_ints/three_body_ijmkl.irp.f
@@ -1,7 +1,11 @@
-
 ! ---
 
-BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
 
   BEGIN_DOC
   !
@@ -14,289 +18,221 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num,
 
   implicit none
   integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_direct_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_direct_bi_ort ...'
-  call wall_time(wall0)
+  double precision :: wall1, wall0
+  integer          :: ipoint
+  double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:)
+  double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:)
+  double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:)
+  double precision, allocatable :: tmp_mat(:,:,:,:)
+  allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num))
 
   provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+  PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t
 
+  print *, ' Providing the three_e_5_idx_bi_ort ...'
+  call wall_time(wall0)
+
+ do m = 1, mo_num
+
+  allocate(grad_mli(n_points_final_grid,mo_num,mo_num))
+  allocate(orb_mat(n_points_final_grid,mo_num,mo_num))
  !$OMP PARALLEL                     &
  !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+ !$OMP PRIVATE (i,l,ipoint) &
+ !$OMP SHARED (m,mo_num,n_points_final_grid, &
+ !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+ !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
+ !$OMP         grad_mli, orb_mat)
+ !$OMP DO COLLAPSE(2)
+  do i=1,mo_num
+    do l=1,mo_num
+       do ipoint=1, n_points_final_grid
+
+         grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( &
+               int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + &
+               int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + &
+               int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) )
+
+         orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i)
+
+       enddo
+    enddo
+  enddo
+
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+
+  call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, &
+      orb_mat, n_points_final_grid,  &
+      grad_mli, n_points_final_grid,  0.d0, &
+      tmp_mat, mo_num*mo_num)
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k,l)
   do i = 1, mo_num
     do k = 1, mo_num
       do j = 1, mo_num
         do l = 1, mo_num
-          do m = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral)
-            three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
+            three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j)
+            three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i)
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END PARALLEL DO
 
-  call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0
-  call print_memory_usage()
+  deallocate(orb_mat,grad_mli)
 
-END_PROVIDER 
 
-! ---
-
-BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
-
-  BEGIN_DOC
-  !
-  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
-  !
-  ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = <mlk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
-  !
-  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_cycle_1_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...'
-  call wall_time(wall0)
-
-  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+  allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num))
+  allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num))
+  allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num))
 
  !$OMP PARALLEL                     &
  !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+ !$OMP PRIVATE (i,l,ipoint) &
+ !$OMP SHARED (m,mo_num,n_points_final_grid, &
+ !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+ !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
+ !$OMP         rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi)
+ !$OMP DO COLLAPSE(2)
+  do i=1,mo_num
+    do l=1,mo_num
+       do ipoint=1, n_points_final_grid
+
+         lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint)
+         lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint)
+         lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint)
+
+         rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i)
+         rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i)
+         rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i)
+
+         rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m)
+         rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m)
+         rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m)
+
+       enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, &
+      lm_grad_ik, 3*n_points_final_grid,  &
+      rm_grad_ik, 3*n_points_final_grid,  0.d0, &
+      tmp_mat, mo_num*mo_num)
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k,l)
   do i = 1, mo_num
     do k = 1, mo_num
       do j = 1, mo_num
         do l = 1, mo_num
-          do m = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral)
-            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
+            three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i)
+            three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j)
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END PARALLEL DO
 
-  call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0
-  call print_memory_usage()
 
-END_PROVIDER 
+  call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, &
+      lm_grad_ik, 3*n_points_final_grid,  &
+      rk_grad_im, 3*n_points_final_grid,  0.d0, &
+      tmp_mat, mo_num*mo_num)
 
-! ---
-
-BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
-
-  BEGIN_DOC
-  !
-  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
-  !
-  ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = <mlk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
-  !
-  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_cycle_2_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...'
-  call wall_time(wall0)
-
-  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
-
- !$OMP PARALLEL                     &
- !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
-  do i = 1, mo_num
-    do k = 1, mo_num
-      do j = 1, mo_num
-        do m = 1, mo_num
-          do l = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral)
-            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
-        enddo
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0
-  call print_memory_usage()
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
-
-  BEGIN_DOC
-  !
-  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
-  !
-  ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
-  !
-  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_exch23_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_exch23_bi_ort ...'
-  call wall_time(wall0)
-
-  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
-
- !$OMP PARALLEL                     &
- !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  !$OMP PARALLEL DO PRIVATE(i,j,k,l)
   do i = 1, mo_num
     do k = 1, mo_num
       do j = 1, mo_num
         do l = 1, mo_num
-          do m = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral)
-            three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
+            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k)
+            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l)
+            three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l)
+            three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k)
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END PARALLEL DO
 
-  call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0
-  call print_memory_usage()
 
-END_PROVIDER 
+  deallocate(lm_grad_ik)
 
-! ---
-
-BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
-
-  BEGIN_DOC
-  !
-  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
-  !
-  ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = <mlk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
-  !
-  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_exch13_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_exch13_bi_ort ...'
-  call wall_time(wall0)
-
-  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+  allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num))
 
  !$OMP PARALLEL                     &
  !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+ !$OMP PRIVATE (i,l,ipoint) &
+ !$OMP SHARED (m,mo_num,n_points_final_grid, &
+ !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+ !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
+ !$OMP         lk_grad_mi)
+ !$OMP DO COLLAPSE(2)
+  do i=1,mo_num
+    do l=1,mo_num
+       do ipoint=1, n_points_final_grid
+
+         lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint)
+         lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint)
+         lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint)
+
+       enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, &
+      lk_grad_mi, 3*n_points_final_grid,  &
+      rm_grad_ik, 3*n_points_final_grid,  0.d0, &
+      tmp_mat, mo_num*mo_num)
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k,l)
   do i = 1, mo_num
     do k = 1, mo_num
       do j = 1, mo_num
         do l = 1, mo_num
-          do m = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral)
-            three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
+            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i)
+            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j)
+            three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i)
+            three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j)
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END PARALLEL DO
 
-  call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0
-  call print_memory_usage()
 
-END_PROVIDER 
+  call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, &
+      lk_grad_mi, 3*n_points_final_grid,  &
+      rk_grad_im, 3*n_points_final_grid,  0.d0, &
+      tmp_mat, mo_num*mo_num)
 
-! ---
-
-BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
-
-  BEGIN_DOC
-  !
-  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
-  !
-  ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = <mlk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
-  !
-  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, k, m, l
-  double precision :: integral, wall1, wall0
-
-  three_e_5_idx_exch12_bi_ort = 0.d0
-  print *, ' Providing the three_e_5_idx_exch12_bi_ort ...'
-  call wall_time(wall0)
-
-  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
-
- !$OMP PARALLEL                     &
- !$OMP DEFAULT (NONE)               &
- !$OMP PRIVATE (i,j,k,m,l,integral) & 
- !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort)
- !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  !$OMP PARALLEL DO PRIVATE(i,j,k,l)
   do i = 1, mo_num
     do k = 1, mo_num
       do j = 1, mo_num
         do l = 1, mo_num
-          do m = 1, mo_num
-            call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral)
-            three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral 
-          enddo
+            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k)
+            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l)
+            three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l)
+            three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k)
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END PARALLEL DO
+
+  deallocate(lk_grad_mi)
+  deallocate(rm_grad_ik)
+  deallocate(rk_grad_im)
+  enddo
+
 
   call wall_time(wall1)
-  print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0
+  print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0
   call print_memory_usage()
 
-END_PROVIDER 
-
-! ---
+END_PROVIDER
 
diff --git a/src/bi_ort_ints/three_body_ijmkl_old.irp.f b/src/bi_ort_ints/three_body_ijmkl_old.irp.f
new file mode 100644
index 00000000..105cd179
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijmkl_old.irp.f
@@ -0,0 +1,295 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_direct_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_direct_bi_ort_old ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral)
+            three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_direct_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = <mlk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_1_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_1_bi_ort_old ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral)
+            three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_1_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = <mlk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_2_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_2_bi_ort_old ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          do l = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral)
+            three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_2_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch23_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_exch23_bi_ort_old ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral)
+            three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch23_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = <mlk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch13_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_exch13_bi_ort_old ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral)
+            three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch13_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = <mlk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+  PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t
+
+  three_e_5_idx_exch12_bi_ort_old = 0.d0
+  print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) &
+ !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old)
+ !$OMP DO SCHEDULE (dynamic) COLLAPSE(2)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral)
+            three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0
+
+END_PROVIDER
+
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index f82e8725..d8145c3e 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -4,7 +4,7 @@
 BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)]
 
  BEGIN_DOC
-! matrix element of the -L  three-body operator 
+! matrix element of the -L  three-body operator
 !
 ! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :)
  END_DOC
@@ -12,7 +12,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
  implicit none
  integer          :: i, j, k, l, m, n
  double precision :: integral, wall1, wall0
- character*(128)  :: name_file 
+ character*(128)  :: name_file
 
   three_body_ints_bi_ort = 0.d0
   print *, ' Providing the three_body_ints_bi_ort ...'
@@ -27,12 +27,12 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
 !   call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file)
 !  else
 
-  !provide x_W_ki_bi_ortho_erf_rk 
+  !provide x_W_ki_bi_ortho_erf_rk
   provide mos_r_in_r_array_transp mos_l_in_r_array_transp
 
  !$OMP PARALLEL                       &
  !$OMP DEFAULT (NONE)                 &
- !$OMP PRIVATE (i,j,k,l,m,n,integral) & 
+ !$OMP PRIVATE (i,j,k,l,m,n,integral) &
  !$OMP SHARED (mo_num,three_body_ints_bi_ort)
  !$OMP DO SCHEDULE (dynamic)
   do i = 1, mo_num
@@ -43,7 +43,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
             do n = 1, mo_num
               call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
-              three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral 
+              three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral
             enddo
           enddo
         enddo
@@ -64,7 +64,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
 !  call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read")
 ! endif
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
@@ -72,7 +72,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS
   !
   END_DOC
 
@@ -86,22 +86,25 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
   PROVIDE int2_grad1_u12_bimo_t
 
   integral = 0.d0
+  ! (n, l, k, m, j, i)
   do ipoint = 1, n_points_final_grid
-    weight = final_weight_at_r_vector(ipoint)                                                                          
 
-    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+    tmp =     mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) &
               * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j)    &
                 + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) )
-    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+
+    tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) &
               * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
-    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+
+    tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
               * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
 
+    integral = integral + tmp * final_weight_at_r_vector(ipoint)
   enddo
 
 end subroutine give_integrals_3_body_bi_ort
@@ -112,7 +115,7 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS
   !
   END_DOC
 
@@ -124,13 +127,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   integral = 0.d0
   do ipoint = 1, n_points_final_grid
-    weight = final_weight_at_r_vector(ipoint)                                                                          
+    weight = final_weight_at_r_vector(ipoint)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) &
 !              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j)  &
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j)  &
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j)  )
-!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) &
 !              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i)  &
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
@@ -139,11 +142,11 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
 !                + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
 
-!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) &
 !              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint)        &
 !                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint)        &
 !                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint)        )
-!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) &
 !              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint)        &
 !                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
 !                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
@@ -152,13 +155,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 !                + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
 !                + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
 
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
-    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        &
               * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) &
                 + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) &
                 + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) )
-    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        &
               * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) &
                 + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) &
                 + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) )
@@ -177,7 +180,7 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS
   !
   END_DOC
 
@@ -189,13 +192,13 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
 
   integral = 0.d0
   do ipoint = 1, n_points_final_grid
-    weight = final_weight_at_r_vector(ipoint)                                                                          
+    weight = final_weight_at_r_vector(ipoint)
 
-    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) &
               * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
                 + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
                 + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
-    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) &
               * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
                 + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
                 + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f
index b63375cf..1467d9a4 100644
--- a/src/ccsd/ccsd_space_orb_sub.irp.f
+++ b/src/ccsd/ccsd_space_orb_sub.irp.f
@@ -1,5 +1,5 @@
 subroutine run_ccsd_space_orb
-  
+
   implicit none
 
   integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d
@@ -12,34 +12,30 @@ subroutine run_ccsd_space_orb
   double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:)
   double precision, allocatable :: t1(:,:), r1(:,:)
   double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
-  
+
   double precision, allocatable :: all_err(:,:), all_t(:,:)
   integer, allocatable          :: list_occ(:), list_vir(:)
   integer(bit_kind)             :: det(N_int,2)
-  integer                       :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4)
-  
-  PROVIDE mo_two_e_integrals_in_map
+  integer                       :: nO, nV, nOa, nVa
+
+!  PROVIDE mo_two_e_integrals_in_map
 
   det = psi_det(:,:,cc_ref)
   print*,'Reference determinant:'
   call print_det(det,N_int)
 
-  ! Extract number of occ/vir alpha/beta spin orbitals
-  !call extract_n_spin(det,n_spin)
-  nOa = cc_nOa !n_spin(1)
-  nOb = cc_nOb !n_spin(2)
-  nVa = cc_nVa !n_spin(3)
-  nVb = cc_nVb !n_spin(4)
+  nOa = cc_nOa
+  nVa = cc_nVa
 
   ! Check that the reference is a closed shell determinant
   if (cc_ref_is_open_shell) then
     call abort
   endif
-  
+
   ! Number of occ/vir spatial orb
   nO = nOa
   nV = nVa
-  
+
   allocate(list_occ(nO),list_vir(nV))
   list_occ = cc_list_occ
   list_vir = cc_list_vir
@@ -47,7 +43,7 @@ subroutine run_ccsd_space_orb
   !call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
   !print*,'occ',list_occ
   !print*,'vir',list_vir
-  
+
   allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
   allocate(tau(nO,nO,nV,nV))
   allocate(t1(nO,nV), r1(nO,nV))
@@ -76,7 +72,7 @@ subroutine run_ccsd_space_orb
   print*,'Det energy', uncorr_energy
   call ccsd_energy_space(nO,nV,tau,t1,energy)
   print*,'Guess energy', uncorr_energy+energy, energy
-  
+
   nb_iter = 0
   not_converged = .True.
   max_r1 = 0d0
@@ -86,9 +82,9 @@ subroutine run_ccsd_space_orb
   write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
   call wall_time(ta)
-  
+
   do while (not_converged)
-    
+
     call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
     call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
     call compute_H_vo(nO,nV,t1,t2,H_vo)
@@ -97,7 +93,7 @@ subroutine run_ccsd_space_orb
     call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
     call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
     max_r = max(max_r1,max_r2)
-    
+
     ! Update
     if (cc_update_method == 'diis') then
       !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
@@ -109,7 +105,7 @@ subroutine run_ccsd_space_orb
       call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
       call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
     else
-      print*,'Unkonw cc_method_method: '//cc_update_method
+      print*,'Unkown cc_method_method: '//cc_update_method
     endif
 
     call update_tau_space(nO,nV,t1,t2,tau)
@@ -122,7 +118,7 @@ subroutine run_ccsd_space_orb
     if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
       not_converged = .False.
     endif
-    
+
   enddo
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
   call wall_time(tb)
@@ -141,18 +137,18 @@ subroutine run_ccsd_space_orb
 
   call write_t1(nO,nV,t1)
   call write_t2(nO,nV,t2)
-  
+
   ! Deallocation
   if (cc_update_method == 'diis') then
     deallocate(all_err,all_t)
   endif
 
   deallocate(H_vv,H_oo,H_vo,r1,r2,tau)
-  
+
   ! CCSD(T)
   double precision :: e_t
 
-  if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then 
+  if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then
 
     ! Dumb way
     !call wall_time(ta)
@@ -169,8 +165,13 @@ subroutine run_ccsd_space_orb
     ! New
     print*,'Computing (T) correction...'
     call wall_time(ta)
-    call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
+!    call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
+!         ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
+
+    e_t = uncorr_energy + energy ! For print in next call
+    call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
          ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
+
     call wall_time(tb)
     print*,'Time: ',tb-ta, ' s'
 
@@ -180,7 +181,7 @@ subroutine run_ccsd_space_orb
     write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
     print*,''
   endif
-  
+
   print*,'Reference determinant:'
   call print_det(det,N_int)
 
@@ -211,8 +212,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
   !$omp default(none)
   e = 0d0
   !$omp do
-  do i = 1, nO
-    do a = 1, nV
+  do a = 1, nV
+    do i = 1, nO
       e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
     enddo
   enddo
@@ -232,7 +233,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
   energy = energy + e
   !$omp end critical
   !$omp end parallel
-  
+
 end
 
 ! Tau
@@ -250,12 +251,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau)
 
   ! internal
   integer                       :: i,j,a,b
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(nO,nV,tau,t2,t1) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  !$OMP DO collapse(3)
+  !$OMP DO
   do b = 1, nV
     do a = 1, nV
       do j = 1, nO
@@ -267,7 +268,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau)
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-    
+
 end
 
 ! R1
@@ -283,7 +284,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
 
   ! out
   double precision, intent(out) :: r1(nO,nV), max_r1
-  
+
   ! internal
   integer                       :: u,i,j,beta,a,b
 
@@ -304,7 +305,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO)
   ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV)
   ! cc_space_f_vo(a,i) * t1(u,a)    -> X1(nO,nO), O(nO*nO*nV)
-  ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV)  
+  ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV)
   !do beta = 1, nV
   !  do u = 1, nO
   !    do i = 1, nO
@@ -324,7 +325,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   call dgemm('T','N', nO, nV, nO, &
              1d0, X_oo, size(X_oo,2), &
                   t1  , size(t1,1), &
-             1d0, r1  , size(r1,1)) 
+             1d0, r1  , size(r1,1))
   deallocate(X_oo)
 
   ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a)
@@ -373,7 +374,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   !$omp shared(nO,nV,X_voov,t2,t1) &
   !$omp private(u,beta,i,a) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do u = 1, nO
       do i = 1, nO
@@ -385,16 +386,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   call dgemv('T', nV*nO, nO*nV, &
              1d0, X_voov, size(X_voov,1) * size(X_voov,2), &
                   H_vo  , 1, &
              1d0, r1    , 1)
-  
+
   deallocate(X_voov)
 
   ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a)
-  ! <=> 
+  ! <=>
   ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta)
   !do beta = 1, nV
   !  do u = 1, nO
@@ -412,7 +413,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) &
   !$omp private(u,beta,i,a) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do u = 1, nO
       do a = 1, nv
@@ -429,17 +430,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
              1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
                   t1     , 1, &
              1d0, r1     , 1)
-  
+
   deallocate(X_ovov)
 
-  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
-  ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) 
+  ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)
+  ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i)
   !do beta = 1, nV
   !  do u = 1, nO
   !    do i = 1, nO
   !      do a = 1, nV
   !        do b = 1, nV
-  !          r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)  
+  !          r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b)
   !        enddo
   !      enddo
   !    enddo
@@ -452,24 +453,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) &
   !$omp private(b,beta,i,a) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do i = 1, nO
       do b = 1, nV
         do a = 1, nV
-          W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)  
+          W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)
         enddo
       enddo
     enddo
   enddo
   !$omp end do nowait
 
-  !$omp do collapse(3)
-  do i = 1, nO
-    do b = 1, nV
-      do a = 1, nV
-        do u = 1, nO
-          T_vvoo(a,b,i,u) = tau(i,u,a,b)  
+  !$omp do
+  do u = 1, nO
+    do i = 1, nO
+      do b = 1, nV
+        do a = 1, nV
+          T_vvoo(a,b,i,u) = tau(i,u,a,b)
         enddo
       enddo
     enddo
@@ -481,17 +482,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
              1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), &
                   W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), &
              1d0, r1    , size(r1,1))
-  
+
   deallocate(W_vvov,T_vvoo)
 
-  ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
-  ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) 
+  ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta)
+  ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta)
   !do beta = 1, nV
   !  do u = 1, nO
   !    do i = 1, nO
   !      do j = 1, nO
   !        do a = 1, nV
-  !          r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) 
+  !          r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta)
   !        enddo
   !      enddo
   !    enddo
@@ -504,8 +505,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) &
   !$omp private(u,a,i,j) &
   !$omp default(none)
-  !$omp do collapse(3)
   do u = 1, nO
+    !$omp do
     do a = 1, nV
       do j = 1, nO
         do i = 1, nO
@@ -513,23 +514,21 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
 
   call dgemm('T','N', nO, nV, nO*nO*nV, &
              -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), &
                    tau   , size(tau,1) * size(tau,2) * size(tau,3), &
               1d0, r1    , size(r1,1))
-  
+
   deallocate(W_oovo)
 
   max_r1 = 0d0
   do a = 1, nV
     do i = 1, nO
-      if (dabs(r1(i,a)) > max_r1) then
-        max_r1 = dabs(r1(i,a))
-      endif
+      max_r1 = max(dabs(r1(i,a)), max_r1)
     enddo
   enddo
 
@@ -538,7 +537,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   !$omp shared(nO,nV,r1) &
   !$omp private(a,i) &
   !$omp default(none)
-  !$omp do 
+  !$omp do
   do a = 1, nV
     do i = 1, nO
       r1(i,a) = -r1(i,a)
@@ -546,7 +545,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
 end
 
 ! H_oo
@@ -578,7 +577,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo)
   !        enddo
   !      enddo
   !    enddo
-  !    
+  !
   !  enddo
   !enddo
 
@@ -601,8 +600,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo)
   call dgemm('N','T', nO, nO, nO*nV*nV,       &
              1d0, tau     , size(tau,1),      &
                   cc_space_w_oovv, size(cc_space_w_oovv,1), &
-             1d0, H_oo    , size(H_oo,1))     
-  
+             1d0, H_oo    , size(H_oo,1))
+
 end
 
 ! H_vv
@@ -633,7 +632,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv)
   !        enddo
   !      enddo
   !    enddo
-  !    
+  !
   !  enddo
   !enddo
 
@@ -656,13 +655,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv)
 
   ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b)
   ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta)
-  
-  !$omp do collapse(3)
+
+  !$omp do
   do beta = 1, nV
     do j = 1, nO
       do i = 1, nO
         do b = 1, nV
-          tmp_tau(b,i,j,beta) = tau(i,j,beta,b) 
+          tmp_tau(b,i,j,beta) = tau(i,j,beta,b)
         enddo
       enddo
     enddo
@@ -676,7 +675,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv)
               1d0, H_vv    , size(H_vv,1))
 
   deallocate(tmp_tau)
-  
+
 end
 
 ! H_vo
@@ -704,7 +703,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo)
   !        H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
   !      enddo
   !    enddo
-  !    
+  !
   !  enddo
   !enddo
 
@@ -727,7 +726,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo)
   ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b)
   ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b)
 
-  !$omp do collapse(3)
+  !$omp do
   do b = 1, nV
     do j = 1, nO
       do i = 1, nO
@@ -746,7 +745,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo)
              1d0, H_vo, 1)
 
   deallocate(w)
-  
+
 end
 
 ! R2
@@ -765,13 +764,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
 
   ! internal
   double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:)
-  double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:)
+  double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:)
   integer                       :: u,v,i,j,beta,gam,a,b
 
   allocate(g_occ(nO,nO), g_vir(nV,nV))
   allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV))
   allocate(A1(nO,nO,nO,nO))
-  
+
   call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
   call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
   call compute_A1(nO,nV,t1,t2,tau,A1)
@@ -787,7 +786,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,cc_space_v_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -835,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !  enddo
   !enddo
 
-  allocate(B1(nV,nV,nV,nV))
-  call compute_B1(nO,nV,t1,t2,B1)
-  call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
-             1d0, tau, size(tau,1) * size(tau,2), &
-                  B1 , size(B1,1) * size(B1,2), &
-             1d0, r2, size(r2,1) * size(r2,2))
-  deallocate(B1)
+!  allocate(B1(nV,nV,nV,nV))
+!  call compute_B1(nO,nV,t1,t2,B1)
+  allocate(B1_gam(nV,nV,nV))
+  do gam=1,nV
+    call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam)
+    call dgemm('N','N',nO*nO,nV,nV*nV, &
+                1d0, tau, size(tau,1) * size(tau,2), &
+                     B1_gam        , size(B1_gam,1) * size(B1_gam,2), &
+                1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2))
+  enddo
+  deallocate(B1_gam)
+
 
   !do gam = 1, nV
   !  do beta = 1, nV
@@ -863,7 +867,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,t2,X_oovv) &
   !$omp private(u,v,gam,a) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do a = 1, nV
     do gam = 1, nV
       do v = 1, nO
@@ -875,7 +879,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   call dgemm('N','N',nO*nO*nV,nV,nV, &
              1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
                   g_vir, size(g_vir,1), &
@@ -885,7 +889,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,Y_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -921,7 +925,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,X_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -957,7 +961,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) &
   !$omp private(u,a,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do u = 1, nO
@@ -979,7 +983,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,Y_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -991,7 +995,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   !do gam = 1, nV
   !  do beta = 1, nV
   !    do v = 1, nO
@@ -1009,13 +1013,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !enddo
   double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:)
   allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) &
   !$omp private(u,v,gam,i) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do gam = 1, nV
       do u = 1, nO
         do a = 1, nV
@@ -1023,8 +1027,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
 
   call dgemm('N','N',nV*nO*nV,nV,nO, &
@@ -1036,12 +1040,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
              1d0, t1, size(t1,1), &
                   Y_vovv, size(Y_vovv,1), &
              0d0, X_oovv, size(X_oovv,1))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,r2,X_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1055,7 +1059,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp end parallel
 
   deallocate(X_vovo,Y_vovv)
-  
+
   !do gam = 1, nV
   !  do beta = 1, nV
   !    do v = 1, nO
@@ -1079,7 +1083,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,X_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1092,7 +1096,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp end do
   !$omp end parallel
 
-  
+
   !do gam = 1, nV
   !  do beta = 1, nV
   !    do v = 1, nO
@@ -1111,13 +1115,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
 
   double precision, allocatable :: Y_oovo(:,:,:,:)
   allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) &
   !$omp private(a,v,gam,i) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do gam = 1, nV
       do v = 1, nO
         do a = 1, nV
@@ -1125,8 +1129,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
 
   call dgemm('N','N',nO,nO*nV*nO,nV, &
@@ -1138,12 +1142,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
              1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), &
                   t1    , size(t1,1), &
              0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,r2,X_oovv) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1155,7 +1159,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   deallocate(X_vovo,Y_oovo)
 
   !do gam = 1, nV
@@ -1182,19 +1186,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) &
   !$omp private(u,v,gam,beta,i,a) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do a = 1, nV
       do beta = 1, nV
         do u = 1, nO
-          X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta))
+          X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta))
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do nowait
 
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do v = 1, nO
       do i = 1, nO
@@ -1206,17 +1210,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   call dgemm('N','N', nO*nV,nO*nV,nV*nO, &
              1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
                   Y_voov, size(Y_voov,1) * size(Y_voov,2), &
              0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,r2,Z_ovov) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1228,9 +1232,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   deallocate(X_ovvo,Y_voov)
-  
+
   !do gam = 1, nV
   !  do beta = 1, nV
   !    do v = 1, nO
@@ -1252,7 +1256,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) &
   !$omp private(u,a,i,beta,gam) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do u = 1, nO
       do a = 1, nV
@@ -1264,7 +1268,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do nowait
 
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do v = 1, nO
       do a = 1, nV
@@ -1281,12 +1285,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
              1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
                   Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
              0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,r2,Z_ovov) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1298,7 +1302,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   !do gam = 1, nV
   !  do beta = 1, nV
   !    do v = 1, nO
@@ -1319,7 +1323,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) &
   !$omp private(u,v,gam,beta,i,a) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do a = 1, nV
     do i = 1, nO
       do gam = 1, nV
@@ -1331,7 +1335,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do nowait
 
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do v = 1, nO
       do a = 1, nV
@@ -1343,17 +1347,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   call dgemm('N','N',nO*nV,nO*nV,nO*nV, &
              1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
                   Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), &
              0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,r2,Z_ovov) &
   !$omp private(u,v,gam,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do v = 1, nO
@@ -1367,13 +1371,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   !$omp end parallel
 
   deallocate(X_ovov,Y_ovov,Z_ovov)
-  
+
   ! Change the sign for consistency with the code in spin orbitals
   !$omp parallel &
   !$omp shared(nO,nV,r2) &
   !$omp private(i,j,a,b) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do b = 1, nV
     do a = 1, nV
       do j = 1, nO
@@ -1385,22 +1389,20 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   max_r2 = 0d0
   do b = 1, nV
     do a = 1, nV
       do j = 1, nO
         do i = 1, nO
-          if (dabs(r2(i,j,a,b)) > max_r2) then
-            max_r2 = dabs(r2(i,j,a,b))
-          endif
+          max_r2 = max(r2(i,j,a,b), max_r2)
         enddo
       enddo
     enddo
   enddo
 
   deallocate(g_occ,g_vir,J1,K1,A1)
-  
+
 end
 
 ! A1
@@ -1429,12 +1431,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
   !          A1(u,v,i,j) = A1(u,v,i,j) &
   !          + cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
   !          + cc_space_v_vooo(a,v,i,j) * t1(u,a)
-  !          
+  !
   !          do b = 1, nV
   !            A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
-  !          enddo  
+  !          enddo
   !        enddo
-  !        
+  !
   !      enddo
   !    enddo
   !  enddo
@@ -1442,13 +1444,13 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
 
   double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:)
   allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO))
-  
+
   ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
   !$omp parallel &
   !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) &
   !$omp private(u,v,i,j) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do collapse(2)
   do j = 1, nO
     do i = 1, nO
       do v = 1, nO
@@ -1462,7 +1464,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
 
   ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
 
-  !$omp do collapse(3)
+  !$omp do collapse(2)
   do j = 1, nO
     do i = 1, nO
       do u = 1, nO
@@ -1484,7 +1486,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
   !$omp shared(nO,nV,A1,Y_oooo) &
   !$omp private(u,v,i,j) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do collapse(2)
   do j = 1, nO
     do i = 1, nO
       do v = 1, nO
@@ -1496,7 +1498,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   deallocate(X_vooo,Y_oooo)
 
   ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a)
@@ -1510,11 +1512,95 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1)
              1d0, tau     , size(tau,1) * size(tau,2), &
                   cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
              1d0, A1      , size(A1,1) * size(A1,2))
-   
+
 end
 
 ! B1
 
+subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV,gam
+  double precision, intent(in)  :: t1(nO, nV)
+  double precision, intent(in)  :: t2(nO, nO, nV, nV)
+  double precision, intent(out) :: B1(nV, nV, nV)
+
+  integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta
+
+!  do beta = 1, nV
+!    do b = 1, nV
+!      do a = 1, nV
+!        B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
+!
+!        do i = 1, nO
+!          B1(a,b,beta) = B1(a,b,beta) &
+!          - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+!          - cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+!        enddo
+!
+!      enddo
+!    enddo
+!  enddo
+
+  double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:)
+  allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV))
+!  ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
+  !$omp parallel &
+  !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) &
+  !$omp private(a,b,beta) &
+  !$omp default(none)
+  !$omp do
+    do beta = 1, nV
+      do b = 1, nV
+        do a = 1, nV
+          B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
+        enddo
+      enddo
+    enddo
+  !$omp end do nowait
+  do i = 1, nO
+    !$omp do
+      do b = 1, nV
+        do a = 1, nV
+          X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam)
+        enddo
+      enddo
+    !$omp end do nowait
+  enddo
+  !$omp end parallel
+
+!  ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
+  call dgemm('N','N', nV*nV*nV, 1, nO, &
+             -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), &
+                   t1(1,gam), size(t1,1), &
+              1d0, B1      , size(B1,1) * size(B1,2) * size(B1,3))
+
+  ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
+  call dgemm('N','N', nV*nV, nV, nO, &
+             -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), &
+                   t1    , size(t1,1), &
+              0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2))
+
+  !$omp parallel &
+  !$omp shared(nV,B1,Y_vvvv,gam) &
+  !$omp private(a,b,beta) &
+  !$omp default(none)
+  !$omp do
+  do beta = 1, nV
+    do b = 1, nV
+      do a = 1, nV
+        B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta)
+      enddo
+    enddo
+  enddo
+  !$omp end do
+  !$omp end parallel
+
+  deallocate(X_vvvo,Y_vvvv)
+
+end
+
 subroutine compute_B1(nO,nV,t1,t2,B1)
 
   implicit none
@@ -1532,28 +1618,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1)
   !  do beta = 1, nV
   !    do b = 1, nV
   !      do a = 1, nV
-  !        B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+  !        B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
 
   !        do i = 1, nO
   !          B1(a,b,beta,gam) = B1(a,b,beta,gam) &
   !          - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
   !          - cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
   !        enddo
-  !        
+  !
   !      enddo
   !    enddo
   !  enddo
   !enddo
-  
+
   double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:)
   allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV))
 
-  ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) 
+  ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
   !$omp parallel &
   !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) &
   !$omp private(a,b,beta,gam) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do b = 1, nV
@@ -1564,8 +1650,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1)
     enddo
   enddo
   !$omp end do nowait
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do gam = 1, nV
       do b = 1, nV
         do a = 1, nV
@@ -1573,17 +1659,17 @@ subroutine compute_B1(nO,nV,t1,t2,B1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
-  
+
   ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) &
   call dgemm('N','N', nV*nV*nV, nV, nO, &
              -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), &
                    t1      , size(t1,1), &
               1d0, B1      , size(B1,1) * size(B1,2) * size(B1,3))
 
-  
+
   ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta)
   call dgemm('N','N', nV*nV*nV, nV, nO, &
              -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), &
@@ -1594,7 +1680,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1)
   !$omp shared(nV,B1,Y_vvvv) &
   !$omp private(a,b,beta,gam) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do gam = 1, nV
     do beta = 1, nV
       do b = 1, nV
@@ -1606,9 +1692,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   deallocate(X_vvvo,Y_vvvv)
-  
+
 end
 
 ! g_occ
@@ -1629,14 +1715,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
   !do i = 1, nO
   !  do u = 1, nO
   !    g_occ(u,i) = H_oo(u,i)
-  !    
+  !
   !    do a = 1, nV
   !      g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a)
-  !      
+  !
   !      do j = 1, nO
   !        g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a)
   !      enddo
-  !         
+  !
   !    enddo
   !  enddo
   !enddo
@@ -1657,8 +1743,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
     enddo
   enddo
   !$omp end do
-  
-  !$omp do collapse(1)
+
+  !$omp do
   do i = 1, nO
     do j = 1, nO
       do a = 1, nV
@@ -1670,7 +1756,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
 end
 
 ! g_vir
@@ -1691,23 +1777,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
   !do beta = 1, nV
   !  do a = 1, nV
   !    g_vir(a,beta) = H_vv(a,beta)
-  !    
+  !
   !    do i = 1, nO
   !      g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta)
-  !      
+  !
   !      do b = 1, nV
   !        g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b)
   !      enddo
-  !         
+  !
   !    enddo
   !  enddo
   !enddo
-  
+
   call dgemm('N','N',nV,nV,nO, &
              -1d0, cc_space_f_vo , size(cc_space_f_vo,1), &
                    t1   , size(t1,1), &
               0d0, g_vir, size(g_vir,1))
-      
+
   !$omp parallel &
   !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) &
   !$omp private(i,b,a,beta) &
@@ -1720,7 +1806,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
   enddo
   !$omp end do
 
-  !$omp do collapse(1)
+  !$omp do
   do beta = 1, nV
     do i = 1, nO
       do b = 1, nV
@@ -1732,7 +1818,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
 end
 
 ! J1
@@ -1765,7 +1851,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
 
   !        do b = 1, nV
   !          J1(u,a,beta,i) = J1(u,a,beta,i) &
-  !          + cc_space_v_vvvo(b,a,beta,i) * t1(u,b)    
+  !          + cc_space_v_vvvo(b,a,beta,i) * t1(u,b)
   !        enddo
 
   !        do j = 1, nO
@@ -1775,7 +1861,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   !           + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
   !          enddo
   !        enddo
-  !        
+  !
   !      enddo
   !    enddo
   !  enddo
@@ -1783,13 +1869,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
 
   double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:)
   allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) &
   !$omp private(i,j,a,u,beta) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do beta = 1, nV
       do a = 1, nV
         do u = 1, nO
@@ -1797,10 +1883,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do nowait
 
-  !$omp do collapse(3)
+  !$omp do collapse(2)
   do j = 1, nO
     do i = 1, nO
       do a = 1, nV
@@ -1812,7 +1898,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   enddo
   !$omp end do
   !$omp end parallel
-  
+
   call dgemm('N','N',nO*nV*nO,nV,nO, &
             -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), &
                   t1    , size(t1,1), &
@@ -1822,8 +1908,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   !$omp shared(nO,nV,J1,Y_ovov) &
   !$omp private(i,beta,a,u) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do beta = 1, nV
       do a = 1, nV
         do u = 1, nO
@@ -1831,8 +1917,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
   deallocate(X_ovoo)
 
@@ -1849,7 +1935,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) &
   !$omp private(i,beta,a,u,b,j) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do b = 1, nV
     do j = 1, nO
       do beta = 1, nV
@@ -1861,7 +1947,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   enddo
   !$omp end do nowait
 
-  !$omp do collapse(3)
+  !$omp do
   do b = 1, nV
     do j = 1, nO
       do i = 1, nO
@@ -1886,8 +1972,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) &
   !$omp private(i,beta,a,u,j,b) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do beta = 1, nV
       do a = 1, nV
         do u = 1, nO
@@ -1895,12 +1981,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do nowait
-  
+
   !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b)
-  !$omp do collapse(3)
   do j = 1, nO
+    !$omp do
     do b = 1, nV
       do i = 1, nO
         do a = 1, nV
@@ -1908,11 +1994,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do nowait
-  
-  !$omp do collapse(3)
+
   do j = 1, nO
+    !$omp do
     do b = 1, nV
       do beta = 1, nV
         do u = 1, nO
@@ -1920,10 +2006,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
-  
+
   call dgemm('N','T',nO*nV,nV*nO,nV*nO, &
              1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), &
                   Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), &
@@ -1933,8 +2019,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
   !$omp shared(nO,nV,J1,Z_ovvo) &
   !$omp private(i,beta,a,u) &
   !$omp default(none)
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do beta = 1, nV
       do a = 1, nV
         do u = 1, nO
@@ -1942,12 +2028,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do
   !$omp end parallel
 
-  deallocate(X_ovvo,Z_ovvo,Y_ovov)  
-  
+  deallocate(X_ovvo,Z_ovvo,Y_ovov)
+
 end
 
 ! K1
@@ -1982,7 +2068,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
 
   !        do b = 1, nV
   !          K1(u,a,i,beta) = K1(u,a,i,beta) &
-  !          + cc_space_v_vvov(b,a,i,beta) * t1(u,b)    
+  !          + cc_space_v_vvov(b,a,i,beta) * t1(u,b)
   !        enddo
 
   !        do j = 1, nO
@@ -1991,19 +2077,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
   !           - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta))
   !          enddo
   !        enddo
-  !        
+  !
   !      enddo
   !    enddo
   !  enddo
   !enddo
 
   allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO))
-  
+
   !$omp parallel &
   !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) &
   !$omp private(i,beta,a,u,j,b) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
   do beta = 1, nV
     do i = 1, nO
       do a = 1, nV
@@ -2015,8 +2101,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
   enddo
   !$omp end do nowait
 
-  !$omp do collapse(3)
   do i = 1, nO
+    !$omp do
     do a = 1, nV
       do j = 1, nO
         do b = 1, nV
@@ -2024,11 +2110,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
         enddo
       enddo
     enddo
+    !$omp end do nowait
   enddo
-  !$omp end do nowait
 
-  !$omp do collapse(3)
   do j = 1, nO
+    !$omp do
     do b = 1, nV
       do beta = 1, nV
         do u = 1, nO
@@ -2036,8 +2122,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
         enddo
       enddo
     enddo
+    !$omp end do
   enddo
-  !$omp end do
   !$omp end parallel
 
   call dgemm('N','N',nO*nV*nO,nV,nO, &
@@ -2060,7 +2146,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
   !$omp shared(nO,nV,K1,Z) &
   !$omp private(i,beta,a,u) &
   !$omp default(none)
-  !$omp do collapse(3)
+  !$omp do
    do beta = 1, nV
     do i = 1, nO
       do a = 1, nV
@@ -2074,5 +2160,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1)
   !$omp end parallel
 
   deallocate(X,Y,Z)
-  
+
 end
diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f
index 3b762a06..1aab6bd7 100644
--- a/src/ccsd/ccsd_t_space_orb_abc.irp.f
+++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f
@@ -10,51 +10,43 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
   double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
   double precision, intent(out) :: energy
 
-  double precision, allocatable :: W(:,:,:,:,:,:)
-  double precision, allocatable :: V(:,:,:,:,:,:)
-  double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:)
-  double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:)
-  double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:)
-  double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:)
-  double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:)
+  double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
+  double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
   integer                       :: i,j,k,l,a,b,c,d
-  double precision              :: e,ta,tb, delta, delta_abc
+  double precision              :: e,ta,tb
 
-  !allocate(W(nV,nV,nV,nO,nO,nO))
-  !allocate(V(nV,nV,nV,nO,nO,nO))
-  allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO))
-  allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO))
-  allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO))
-  allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO))
+  call set_multiple_levels_omp(.False.)
+
+  allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
+  allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
 
-  ! Temporary arrays
   !$OMP PARALLEL &
-  !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, &
+  !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
   !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
   !$OMP PRIVATE(a,b,c,d,i,j,k,l) &
   !$OMP DEFAULT(NONE)
 
   !v_vvvo(b,a,d,i) * t2(k,j,c,d) &
-  !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j)
+  !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k)
 
-  !$OMP DO collapse(3)
-  do i = 1, nO
-    do a = 1, nV
-      do b = 1, nV
+  !$OMP DO
+  do a = 1, nV
+    do b = 1, nV
+      do i = 1, nO
         do d = 1, nV
-          X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i)
+          X_vovv(d,i,b,a) = v_vvvo(b,a,d,i)
         enddo
       enddo
     enddo
   enddo
   !$OMP END DO nowait
 
-  !$OMP DO collapse(3)
-  do j = 1, nO
-    do k = 1, nO
-      do c = 1, nV
+  !$OMP DO
+  do c = 1, nV
+    do j = 1, nO
+      do k = 1, nO
         do d = 1, nV
-          T_vvoo(d,c,k,j) = t2(k,j,c,d)
+          T_voov(d,k,j,c) = t2(k,j,c,d)
         enddo
       enddo
     enddo
@@ -62,191 +54,399 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
   !$OMP END DO nowait
 
   !v_vooo(c,j,k,l) * t2(i,l,a,b) &
-  !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) &
+  !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) &
 
-  !$OMP DO collapse(3)
-  do k = 1, nO
-    do j = 1, nO
-      do c = 1, nV
-        do l = 1, nO
-           X_ovoo(l,c,j,k) = v_vooo(c,j,k,l)
-        enddo
-      enddo
-    enddo
-  enddo
-  !$OMP END DO nowait
-
-  !$OMP DO collapse(3)
-  do i = 1, nO
-    do b = 1, nV
-      do a = 1, nV
-        do l = 1, nO
-          T_ovvo(l,a,b,i) = t2(i,l,a,b)
-        enddo
-      enddo
-    enddo
-  enddo
-  !$OMP END DO nowait
-
-  !v_vvoo(b,c,j,k) * t1(i,a) &
-  !X_vvoo(b,c,k,j) * T1_vo(a,i) &
-
-  !$OMP DO collapse(3)
-  do j = 1, nO
+  !$OMP DO
+  do c = 1, nV
     do k = 1, nO
-      do c = 1, nV
-        do b = 1, nV
-          X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k)
+      do j = 1, nO
+        do l = 1, nO
+           X_ooov(l,j,k,c) = v_vooo(c,j,k,l)
         enddo
       enddo
     enddo
   enddo
   !$OMP END DO nowait
 
-  !$OMP DO collapse(1)
-  do i = 1, nO
+  !$OMP DO
+  do b = 1, nV
     do a = 1, nV
-      T_vo(a,i) = t1(i,a)
+      do i = 1, nO
+        do l = 1, nO
+          T_oovv(l,i,a,b) = t2(i,l,a,b)
+        enddo
+      enddo
     enddo
   enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
+  !$OMP END DO nowait
 
-  call wall_time(ta)
-  energy = 0d0
+  !X_oovv(j,k,b,c) * T1_vo(a,i) &
+
+  !$OMP DO
   do c = 1, nV
     do b = 1, nV
-      do a = 1, nV
-        delta_abc = f_v(a) + f_v(b) + f_v(c)
-        call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc)
-        call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca)
-        call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab)
-        call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba)
-
-        call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc)
-        call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba)
-        !$OMP PARALLEL                                               &
-            !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)&
-            !$OMP PRIVATE(i,j,k,e,delta)                             &
-            !$OMP DEFAULT(NONE)
-        e = 0d0
-        !$OMP DO
-        do i = 1, nO
-          do j = 1, nO
-            do k = 1, nO
-              delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
-              !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c))  !delta_ooovvv(i,j,k,a,b,c)
-              e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))&
-                  * (V_abc(i,j,k) - V_cba(i,j,k)) * delta
-            enddo
-          enddo
+      do k = 1, nO
+        do j = 1, nO
+          X_oovv(j,k,b,c) = v_vvoo(b,c,j,k)
         enddo
-        !$OMP END DO NOWAIT
-        !$OMP CRITICAL
-        energy = energy + e
-        !$OMP END CRITICAL
-        !$OMP END PARALLEL
       enddo
     enddo
-    call wall_time(tb)
-    write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s'
   enddo
+  !$OMP END DO nowait
 
-  energy = energy / 3d0
+  !$OMP END PARALLEL
 
-  deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo)
-  !deallocate(V,W)
+  double precision, external :: ccsd_t_task_aba
+  double precision, external :: ccsd_t_task_abc
+
+  !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
+  e = 0d0
+  !$OMP DO SCHEDULE(dynamic)
+  do a = 1, nV
+    do b = a+1, nV
+      do c = b+1, nV
+        e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
+                        X_ooov,X_oovv,X_vovv,f_o,f_v)
+      enddo
+
+      e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, &
+                      X_ooov,X_oovv,X_vovv,f_o,f_v)
+
+      e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, &
+                      X_ooov,X_oovv,X_vovv,f_o,f_v)
+
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  energy = energy + e
+  !$OMP END CRITICAL
+
+  !$OMP END PARALLEL
+
+  energy = energy / 3.d0
+
+  deallocate(X_vovv,X_ooov,T_voov,T_oovv)
 end
 
 
-subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc)
+double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,&
+      X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
+  implicit none
+  integer, intent(in)              :: nO,nV,a,b,c
+  double precision, intent(in)     :: t1(nO,nV), f_o(nO), f_v(nV)
+  double precision, intent(in)     :: X_oovv(nO,nO,nV,nV)
+  double precision, intent(in)     :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
+  double precision, intent(in)     :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
+
+  double precision :: delta, delta_abc
+  integer  :: i,j,k
+
+  double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
+  double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
+  double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
+  double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
+
+  allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
+            W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
+            V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
+            V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
+
+  call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
+
+  call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
+
+  delta_abc = f_v(a) + f_v(b) + f_v(c)
+  e = 0.d0
+
+  do k = 1, nO
+    do j = 1, nO
+      do i = 1, nO
+        delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
+        e = e + delta * (                                    &
+            (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) +           &
+            W_bca(i,j,k) - W_bac(i,j,k)  +                   &
+            W_cab(i,j,k) - W_acb(i,j,k)  ) * (V_abc(i,j,k) - V_cba(i,j,k)) +&
+            (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) +           &
+            W_cba(i,j,k) - W_cab(i,j,k)  +                   &
+            W_bac(i,j,k) - W_abc(i,j,k)  ) * (V_acb(i,j,k) - V_bca(i,j,k)) +&
+            (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) +           &
+            W_acb(i,j,k) - W_abc(i,j,k)  +                   &
+            W_cba(i,j,k) - W_bca(i,j,k)  ) * (V_bac(i,j,k) - V_cab(i,j,k)) )
+      enddo
+    enddo
+  enddo
+
+  deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
+             V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
+
+end
+
+double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,&
+      X_ooov,X_oovv,X_vovv,f_o,f_v) result(e)
+  implicit none
+  integer, intent(in)              :: nO,nV,a,b
+  double precision, intent(in)     :: t1(nO,nV), f_o(nO), f_v(nV)
+  double precision, intent(in)     :: X_oovv(nO,nO,nV,nV)
+  double precision, intent(in)     :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
+  double precision, intent(in)     :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
+
+  double precision :: delta, delta_abc
+  integer  :: i,j,k
+
+  double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:)
+  double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:)
+  double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:)
+  double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:)
+
+  allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), &
+            W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), &
+            V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), &
+            V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) )
+
+  call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
+
+  call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
+
+  delta_abc = f_v(a) + f_v(b) + f_v(a)
+  e = 0.d0
+
+  do k = 1, nO
+    do j = 1, nO
+      do i = 1, nO
+        delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc)
+        e = e + delta * (                                    &
+               (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + &
+               (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + &
+               (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) )
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, &
+             V_abc, V_cab, V_bca, V_bac, V_cba, V_acb )
+
+end
+
+subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb)
 
   implicit none
 
   integer, intent(in)           :: nO,nV,a,b,c
-  !double precision, intent(in) :: t2(nO,nO,nV,nV)
-  double precision, intent(in)  :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO)
-  double precision, intent(in)  :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO)
+  double precision, intent(in)  :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV)
+  double precision, intent(in)  :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV)
   double precision, intent(out) :: W_abc(nO,nO,nO)
+  double precision, intent(out) :: W_cba(nO,nO,nO)
+  double precision, intent(out) :: W_bca(nO,nO,nO)
+  double precision, intent(out) :: W_cab(nO,nO,nO)
+  double precision, intent(out) :: W_bac(nO,nO,nO)
+  double precision, intent(out) :: W_acb(nO,nO,nO)
 
   integer :: l,i,j,k,d
+  double precision, allocatable, dimension(:,:,:,:) :: W_ikj
+  double precision, allocatable :: X(:,:,:,:)
 
+  allocate(W_ikj(nO,nO,nO,6))
+  allocate(X(nV,nO,nO,3))
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) &
-  !$OMP PRIVATE(i,j,k,d,l) &
-  !$OMP DEFAULT(NONE)
-
-  !$OMP DO collapse(3)
-  do k = 1, nO
-    do j = 1, nO
-      do i = 1, nO
-        W_abc(i,j,k) = 0.d0
-
-        do d = 1, nV
-          W_abc(i,j,k) = W_abc(i,j,k) &
-                 + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) &
-                 + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) &
-                 + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) &
-                 + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) &
-                 + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) &
-                 + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i)
-
-        enddo
-
-        do l = 1, nO
-          W_abc(i,j,k) = W_abc(i,j,k) &
-              - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) &
-              - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj
-              - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik
-              - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij
-              - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj
-              - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik
-        enddo
-
+  do k=1,nO
+    do i=1,nO
+      do d=1,nV
+        X(d,i,k,1) = T_voov(d,k,i,a)
+        X(d,i,k,2) = T_voov(d,k,i,b)
+        X(d,i,k,3) = T_voov(d,k,i,c)
       enddo
     enddo
   enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
+
+!   X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk
+
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO)
+
+!   T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k
+
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO)
 
 
+!   X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji
+
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO)
+
+!   X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj
+
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO)
+  call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO)
+
+!  T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j
+
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO)
+
+!   T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO)
+
+  deallocate(X)
+
+  allocate(X(nO,nO,nO,3))
+
+  do k=1,nO
+    do j=1,nO
+      do l=1,nO
+        X(l,j,k,1) = X_ooov(l,k,j,a)
+        X(l,j,k,2) = X_ooov(l,k,j,b)
+        X(l,j,k,3) = X_ooov(l,k,j,c)
+      enddo
+    enddo
+  enddo
+
+
+!   - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO)
+
+!   - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO)
+  call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO)
+
+!   - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO)
+
+!   - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO)
+
+!   - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO)
+
+!   - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO)
+  call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO)
+
+  do k=1,nO
+    do j=1,nO
+      do i=1,nO
+        W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1)
+        W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2)
+        W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3)
+        W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4)
+        W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5)
+        W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6)
+      enddo
+    enddo
+  enddo
+
+  deallocate(X,W_ikj)
 end
 
 
 ! V_abc
 
-subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V)
+subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb)
 
 implicit none
 
   integer, intent(in)           :: nO,nV,a,b,c
-  !double precision, intent(in)  :: t1(nO,nV)
-  double precision, intent(in)  :: T_vo(nV,nO)
-  double precision, intent(in)  :: X_vvoo(nV,nV,nO,nO)
-  double precision, intent(in)  :: W(nO,nO,nO)
-  double precision, intent(out) :: V(nO,nO,nO)
+  double precision, intent(in)  :: T_ov(nO,nV)
+  double precision, intent(in)  :: X_oovv(nO,nO,nV,nV)
+  double precision, intent(in)  :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO)
+  double precision, intent(in)  :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO)
+  double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO)
+  double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO)
 
   integer :: i,j,k
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) &
-  !$OMP PRIVATE(i,j,k) &
-  !$OMP DEFAULT(NONE)
-  !$OMP DO collapse(2)
   do k = 1, nO
     do j = 1, nO
       do i = 1, nO
-        !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) &
-        V(i,j,k) = W(i,j,k) &
-           + X_vvoo(b,c,k,j) * T_vo(a,i) &
-           + X_vvoo(a,c,k,i) * T_vo(b,j) &
-           + X_vvoo(a,b,j,i) * T_vo(c,k)
+        V_abc(i,j,k) = W_abc(i,j,k) &
+           + X_oovv(j,k,b,c) * T_ov(i,a) &
+           + X_oovv(i,k,a,c) * T_ov(j,b) &
+           + X_oovv(i,j,a,b) * T_ov(k,c)
+
+        V_cba(i,j,k) = W_cba(i,j,k) &
+           + X_oovv(j,k,b,a) * T_ov(i,c) &
+           + X_oovv(i,k,c,a) * T_ov(j,b) &
+           + X_oovv(i,j,c,b) * T_ov(k,a)
+
+        V_bca(i,j,k) = W_bca(i,j,k) &
+           + X_oovv(j,k,c,a) * T_ov(i,b) &
+           + X_oovv(i,k,b,a) * T_ov(j,c) &
+           + X_oovv(i,j,b,c) * T_ov(k,a)
+
+        V_cab(i,j,k) = W_cab(i,j,k) &
+           + X_oovv(j,k,a,b) * T_ov(i,c) &
+           + X_oovv(i,k,c,b) * T_ov(j,a) &
+           + X_oovv(i,j,c,a) * T_ov(k,b)
+
+        V_bac(i,j,k) = W_bac(i,j,k) &
+           + X_oovv(j,k,a,c) * T_ov(i,b) &
+           + X_oovv(i,k,b,c) * T_ov(j,a) &
+           + X_oovv(i,j,b,a) * T_ov(k,c)
+
+        V_acb(i,j,k) = W_acb(i,j,k) &
+           + X_oovv(j,k,c,b) * T_ov(i,a) &
+           + X_oovv(i,k,a,b) * T_ov(j,c) &
+           + X_oovv(i,j,a,c) * T_ov(k,b)
+
       enddo
     enddo
   enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
 
 end
 
diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f
new file mode 100644
index 00000000..b669025e
--- /dev/null
+++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f
@@ -0,0 +1,363 @@
+! Main
+subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
+
+  implicit none
+
+  integer, intent(in)           :: nO,nV
+  double precision, intent(in)  :: t1(nO,nV), f_o(nO), f_v(nV)
+  double precision, intent(in)  :: t2(nO,nO,nV,nV)
+  double precision, intent(in)  :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO)
+  double precision, intent(inout) :: energy
+
+  double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:)
+  double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:)
+  integer                       :: i,j,k,l,a,b,c,d
+  double precision              :: e,ta,tb,eccsd
+
+  eccsd = energy
+  call set_multiple_levels_omp(.False.)
+
+  allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV))
+  allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV))
+
+  !$OMP PARALLEL &
+  !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, &
+  !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) &
+  !$OMP PRIVATE(a,b,c,d,i,j,k,l) &
+  !$OMP DEFAULT(NONE)
+
+  !v_vvvo(b,a,d,i) * t2(k,j,c,d) &
+  !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k)
+
+  !$OMP DO
+  do a = 1, nV
+    do b = 1, nV
+      do i = 1, nO
+        do d = 1, nV
+          X_vovv(d,i,b,a) = v_vvvo(b,a,d,i)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO
+  do c = 1, nV
+    do j = 1, nO
+      do k = 1, nO
+        do d = 1, nV
+          T_voov(d,k,j,c) = t2(k,j,c,d)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !v_vooo(c,j,k,l) * t2(i,l,a,b) &
+  !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) &
+
+  !$OMP DO
+  do c = 1, nV
+    do k = 1, nO
+      do j = 1, nO
+        do l = 1, nO
+           X_ooov(l,j,k,c) = v_vooo(c,j,k,l)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP DO
+  do b = 1, nV
+    do a = 1, nV
+      do i = 1, nO
+        do l = 1, nO
+          T_oovv(l,i,a,b) = t2(i,l,a,b)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !X_oovv(j,k,b,c) * T1_vo(a,i) &
+
+  !$OMP DO
+  do c = 1, nV
+    do b = 1, nV
+      do k = 1, nO
+        do j = 1, nO
+          X_oovv(j,k,b,c) = v_vvoo(b,c,j,k)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO nowait
+
+  !$OMP END PARALLEL
+
+  double precision, external :: ccsd_t_task_aba
+  double precision, external :: ccsd_t_task_abc
+!  logical, external :: omp_test_lock
+
+  double precision, allocatable :: memo(:), Pabc(:), waccu(:)
+  integer*8, allocatable :: sampled(:)
+!  integer(omp_lock_kind), allocatable :: lock(:)
+  integer*2       , allocatable :: abc(:,:)
+  integer*8                     :: Nabc, i8
+  integer*8, allocatable :: iorder(:)
+  double precision :: eocc
+  double precision :: norm
+  integer :: kiter, isample
+
+
+  ! Prepare table of triplets (a,b,c)
+
+  Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV
+  allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc))
+  allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc))
+
+!  eocc = 3.d0/dble(nO) * sum(f_o(1:nO))
+  Nabc = 0_8
+  do a = 1, nV
+    do b = a+1, nV
+      do c = b+1, nV
+        Nabc = Nabc + 1_8
+        Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c))
+        abc(1,Nabc) = a
+        abc(2,Nabc) = b
+        abc(3,Nabc) = c
+      enddo
+
+      Nabc = Nabc + 1_8
+      abc(1,Nabc) = a
+      abc(2,Nabc) = b
+      abc(3,Nabc) = a
+      Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b))
+
+      Nabc = Nabc + 1_8
+      abc(1,Nabc) = b
+      abc(2,Nabc) = a
+      abc(3,Nabc) = b
+      Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b))
+    enddo
+  enddo
+
+  do i8=1,Nabc
+   iorder(i8) = i8
+  enddo
+
+  ! Sort triplets in decreasing Pabc
+  call dsort_big(Pabc, iorder, Nabc)
+
+  ! Normalize
+  norm = 0.d0
+  do i8=Nabc,1,-1
+    norm = norm + Pabc(i8)
+  enddo
+  norm = 1.d0/norm
+  do i8=1,Nabc
+    Pabc(i8) = Pabc(i8) * norm
+  enddo
+
+  call i8set_order_big(abc, iorder, Nabc)
+
+
+  ! Cumulative distribution for sampling
+  waccu(Nabc) = 0.d0
+  do i8=Nabc-1,1,-1
+   waccu(i8) = waccu(i8+1) - Pabc(i8+1)
+  enddo
+  waccu(:) = waccu(:) + 1.d0
+
+  logical :: converged, do_comp
+  double precision :: eta, variance, error, sample
+  double precision :: t00, t01
+  integer*8 :: ieta, Ncomputed
+  integer*8, external :: binary_search
+
+  integer :: nbuckets
+  nbuckets = 100
+
+  double precision, allocatable :: wsum(:)
+  allocate(wsum(nbuckets))
+
+  converged = .False.
+  Ncomputed = 0_8
+
+  energy = 0.d0
+  variance = 0.d0
+  memo(:) = 0.d0
+  sampled(:) = -1_8
+
+  integer*8 :: ileft, iright, imin
+  ileft = 1_8
+  iright = Nabc
+  integer*8, allocatable :: bounds(:,:)
+
+  allocate (bounds(2,nbuckets))
+  do isample=1,nbuckets
+    eta = 1.d0/dble(nbuckets) * dble(isample)
+    ieta = binary_search(waccu,eta,Nabc)
+    bounds(1,isample) = ileft
+    bounds(2,isample) = ieta
+    ileft = ieta+1
+    wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) )
+  enddo
+
+  Pabc(:) = 1.d0/Pabc(:)
+
+  print '(A)', ''
+  print '(A)', ' +----------------------+--------------+----------+'
+  print '(A)', ' |      E(CCSD(T))      |   Error      |     %    |'
+  print '(A)', ' +----------------------+--------------+----------+'
+
+
+  call wall_time(t00)
+  imin = 1_8
+  !$OMP PARALLEL                                                     &
+      !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample)                    &
+      !$OMP DEFAULT(SHARED)
+
+  do kiter=1,Nabc
+
+    !$OMP MASTER
+    do while ((imin <= Nabc).and.(sampled(imin)>-1_8))
+      imin = imin+1
+    enddo
+
+    ! Deterministic part
+    if (imin < Nabc) then
+      ieta=imin
+      sampled(ieta) = 0_8
+      a = abc(1,ieta)
+      b = abc(2,ieta)
+      c = abc(3,ieta)
+      Ncomputed += 1_8
+      !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
+      if (a/=c) then
+        memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
+            X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
+      else
+        memo(ieta) =  ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,  &
+            X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
+      endif
+      !$OMP END TASK
+    endif
+
+    ! Stochastic part
+    call random_number(eta)
+    do isample=1,nbuckets
+      if (imin >= bounds(2,isample)) then
+        cycle
+      endif
+      ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)
+
+      if (sampled(ieta) == -1_8) then
+        sampled(ieta) = 0_8
+        a = abc(1,ieta)
+        b = abc(2,ieta)
+        c = abc(3,ieta)
+        Ncomputed += 1_8
+        !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta)
+        if (a/=c) then
+          memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, &
+              X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
+        else
+          memo(ieta) =  ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,  &
+              X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0
+        endif
+        !$OMP END TASK
+      endif
+      sampled(ieta) = sampled(ieta)+1_8
+
+    enddo
+
+    call wall_time(t01)
+    if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then
+      t00 = t01
+
+      !$OMP TASKWAIT
+
+      double precision :: ET, ET2
+      double precision :: energy_stoch, energy_det
+      double precision :: scale
+      double precision :: w
+      double precision :: tmp
+      energy_stoch = 0.d0
+      energy_det   = 0.d0
+      norm = 0.d0
+      scale = 1.d0
+      ET = 0.d0
+      ET2 = 0.d0
+
+
+      do isample=1,nbuckets
+        if (imin >= bounds(2,isample)) then
+          energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample)))
+          scale = scale - wsum(isample)
+        else
+          exit
+        endif
+      enddo
+
+      do ieta=bounds(1,isample), Nabc
+          w = dble(max(sampled(ieta),0_8))
+          tmp = w * memo(ieta) * Pabc(ieta)
+          ET = ET + tmp
+          ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta)
+          norm = norm + w
+      enddo
+      norm = norm/scale
+      if (norm > 0.d0) then
+        energy_stoch = ET / norm
+        variance = ET2 / norm - energy_stoch*energy_stoch
+      endif
+
+      energy = energy_det + energy_stoch
+
+      print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc)
+    endif
+    !$OMP END MASTER
+    if (imin >= Nabc) exit
+  enddo
+
+  !$OMP END PARALLEL
+  print '(A)', ' +----------------------+--------------+----------+'
+  print '(A)', ''
+
+  deallocate(X_vovv,X_ooov,T_voov,T_oovv)
+end
+
+
+
+integer*8 function binary_search(arr, key, size)
+    implicit none
+    BEGIN_DOC
+! Searches the key in array arr(1:size) between l_in and r_in, and returns its index
+    END_DOC
+    integer*8 :: size, i, j, mid, l_in, r_in
+    double precision, dimension(size) :: arr(1:size)
+    double precision :: key
+
+    i = 1_8
+    j = size
+
+    do while (j >= i)
+        mid = i + (j - i) / 2
+        if (arr(mid) >= key) then
+            if (mid > 1 .and. arr(mid - 1) < key) then
+                binary_search = mid
+                return
+            end if
+            j = mid - 1
+        else if (arr(mid) < key) then
+            i = mid + 1
+        else
+            binary_search = mid + 1
+            return
+        end if
+    end do
+    binary_search = i
+end function binary_search
+
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index 6f40a809..0705d103 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -76,6 +76,8 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
 
   double precision, allocatable  :: fock_diag_tmp(:,:)
 
+  if (csubset == 0) return
+
   allocate(fock_diag_tmp(2,mo_num+1))
 
   call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
@@ -177,6 +179,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   monoAdo = .true.
   monoBdo = .true.
 
+  if (csubset == 0) return
 
   do k=1,N_int
     hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
index 4c271a4b..77377554 100644
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         ! <det|H(j)|psi_0> and transpose
         ! -------------------------------------------
 
-!        call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii)
         double precision               :: hmono, htwoe, hthree
         call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii)
         do istate = 1,N_states
@@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
             psi_h_alpha = 0.d0
             alpha_h_psi = 0.d0
             do iii = 1, N_det_selectors
-              call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
-              call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
+              call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
+              call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
               call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int)
               if(degree == 0)then
                print*,'problem !!!'
diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg
deleted file mode 100644
index 8edeecd0..00000000
--- a/src/cosgtos_ao_int/EZFIO.cfg
+++ /dev/null
@@ -1,19 +0,0 @@
-[ao_expoim_cosgtos]
-type: double precision
-doc: imag part for Exponents for each primitive of each cosGTOs |AO|
-size: (ao_basis.ao_num,ao_basis.ao_prim_num_max)
-interface: ezfio, provider
-
-[use_cosgtos]
-type: logical
-doc: If true, use cosgtos for AO integrals
-interface: ezfio,provider,ocaml
-default: False
-
-[ao_integrals_threshold]
-type: Threshold
-doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero
-interface: ezfio,provider,ocaml
-default: 1.e-15
-ezfio_name: threshold_ao
-
diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED
deleted file mode 100644
index 932f88a3..00000000
--- a/src/cosgtos_ao_int/NEED
+++ /dev/null
@@ -1,2 +0,0 @@
-ezfio_files
-ao_basis
diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst
deleted file mode 100644
index 01f25d6d..00000000
--- a/src/cosgtos_ao_int/README.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-==============
-cosgtos_ao_int
-==============
-
diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
deleted file mode 100644
index d65dfba5..00000000
--- a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
+++ /dev/null
@@ -1,7 +0,0 @@
-program cosgtos_ao_int
-  implicit none
-  BEGIN_DOC
-! TODO : Put the documentation of the program here
-  END_DOC
-  print *, 'Hello world'
-end
diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f
index b6ec073f..df753449 100644
--- a/src/fci_tc_bi/diagonalize_ci.irp.f
+++ b/src/fci_tc_bi/diagonalize_ci.irp.f
@@ -39,7 +39,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
    write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc  + pt2_tmp/norm,E_tc  + rpt2_tmp/norm,abs_pt2
    print*,'*****'
   endif
-  psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states)
+  psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion
   psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states)
 
   E_tc  = eigval_right_tc_bi_orth(1)
diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f
index 14d3c696..8b1e6e1c 100644
--- a/src/mo_two_e_ints/cholesky.irp.f
+++ b/src/mo_two_e_ints/cholesky.irp.f
@@ -6,11 +6,42 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num
 
  integer :: k
 
+ call set_multiple_levels_omp(.False.)
+ print *, 'AO->MO Transformation of Cholesky vectors'
  !$OMP PARALLEL DO PRIVATE(k)
  do k=1,cholesky_ao_num
   call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num)
  enddo
  !$OMP END PARALLEL DO
+ print *, ''
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Cholesky vectors in MO basis
+ END_DOC
+
+ integer :: i,j,k
+ double precision, allocatable :: buffer(:,:)
+
+ print *, 'AO->MO Transformation of Cholesky vectors  .'
+ !$OMP PARALLEL PRIVATE(i,j,k,buffer)
+ allocate(buffer(mo_num,mo_num))
+ !$OMP DO SCHEDULE(static)
+ do k=1,cholesky_ao_num
+  call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num)
+  do j=1,mo_num
+    do i=1,mo_num
+      cholesky_mo_transp(k,i,j) = buffer(i,j)
+    enddo
+  enddo
+ enddo
+ !$OMP END DO
+ deallocate(buffer)
+ !$OMP END PARALLEL
+ print *, ''
 
 END_PROVIDER
 
diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f
index 4ffb0134..d807f619 100644
--- a/src/mo_two_e_ints/integrals_3_index.irp.f
+++ b/src/mo_two_e_ints/integrals_3_index.irp.f
@@ -4,24 +4,68 @@
  BEGIN_DOC
  ! big_array_coulomb_integrals(j,i,k)  = <ij|kj> = (ik|jj)
  !
- ! big_array_exchange_integrals(i,j,k) = <ij|jk> = (ij|kj)
+ ! big_array_exchange_integrals(j,i,k) = <ij|jk> = (ij|kj)
  END_DOC
- integer :: i,j,k,l
+ integer :: i,j,k,l,a
  double precision :: get_two_e_integral
  double precision :: integral
 
- do k = 1, mo_num
-  do i = 1, mo_num
-   do j = 1, mo_num
-     l = j
-     integral = get_two_e_integral(i,j,k,l,mo_integrals_map)
-     big_array_coulomb_integrals(j,i,k) = integral
-     l = j
-     integral = get_two_e_integral(i,j,l,k,mo_integrals_map)
-     big_array_exchange_integrals(j,i,k) = integral
+ if (do_ao_cholesky) then
+
+    double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:)
+    allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num))
+    do j=1,mo_num
+      buffer_jj(:,j) = cholesky_mo_transp(:,j,j)
+    enddo
+
+    call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, &
+        cholesky_mo_transp, cholesky_ao_num, &
+        buffer_jj, cholesky_ao_num, 0.d0, &
+        buffer, mo_num*mo_num)
+
+    do k = 1, mo_num
+      do i = 1, mo_num
+        do j = 1, mo_num
+          big_array_coulomb_integrals(j,i,k) = buffer(i,k,j)
+        enddo
+      enddo
+    enddo
+    deallocate(buffer_jj)
+
+    allocate(buffer_jj(mo_num,mo_num))
+
+    do j = 1, mo_num
+
+      call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, &
+        cholesky_mo_transp(1,1,j), cholesky_ao_num, &
+        cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, &
+        buffer_jj, mo_num)
+
+      do k=1,mo_num
+        do i=1,mo_num
+          big_array_exchange_integrals(j,i,k) = buffer_jj(i,k)
+       enddo
+     enddo
+    enddo
+
+    deallocate(buffer_jj)
+
+ else
+
+   do k = 1, mo_num
+     do i = 1, mo_num
+       do j = 1, mo_num
+         l = j
+         integral = get_two_e_integral(i,j,k,l,mo_integrals_map)
+         big_array_coulomb_integrals(j,i,k) = integral
+         l = j
+         integral = get_two_e_integral(i,j,l,k,mo_integrals_map)
+         big_array_exchange_integrals(j,i,k) = integral
+       enddo
+     enddo
    enddo
-  enddo
- enddo
+
+ endif
 
 END_PROVIDER
 
diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index b7ef901d..a461504e 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -1353,15 +1353,30 @@ END_PROVIDER
   integer                        :: i,j
   double precision               :: get_two_e_integral
 
-  PROVIDE mo_two_e_integrals_in_map
-  mo_two_e_integrals_jj = 0.d0
-  mo_two_e_integrals_jj_exchange = 0.d0
+
+  if (do_ao_cholesky) then
+    do j=1,mo_num
+      do i=1,mo_num
+        !TODO: use dgemm
+        mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j))
+        mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i))
+      enddo
+    enddo
+
+  else
+
+    do j=1,mo_num
+      do i=1,mo_num
+        mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map)
+        mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map)
+      enddo
+    enddo
+
+  endif
 
   do j=1,mo_num
     do i=1,mo_num
-      mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map)
-      mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map)
-      mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j)
+        mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j)
     enddo
   enddo
 
diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f
index cbd0b406..5e99600e 100644
--- a/src/non_h_ints_mu/jast_deriv.irp.f
+++ b/src/non_h_ints_mu/jast_deriv.irp.f
@@ -187,6 +187,19 @@ end function j12_mu
 
 subroutine grad1_j12_mu(r1, r2, grad)
 
+  BEGIN_DOC
+!  gradient of j(mu(r1,r2),r12) form of jastrow. 
+!
+! if mu(r1,r2) = cst ---> j1b_type < 200 and 
+!
+!  d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2)
+!
+! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and 
+!
+! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 )   d/dx1 mu(r1,r2) 
+!
+!                          + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2)
+  END_DOC
   include 'constants.include.F'
 
   implicit none
@@ -515,6 +528,9 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der)
   double precision              :: r(3)
   double precision              :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1)
   double precision              :: dm_tot, tmp1, tmp2, tmp3
+  double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot
+  double precision :: f_rho1, f_rho2, d_drho_f_rho1
+  double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume
 
   if(j1b_type .eq. 200) then
 
@@ -578,8 +594,84 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der)
     mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1))
     mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1))
 
-  else
+  elseif(j1b_type .eq. 202) then
 
+    ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO
+    !
+    ! RHO = rho(r1) + rho(r2)
+    !
+    ! f[rho] = alpha rho^beta + mu0 exp(-rho)
+    !
+    ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) 
+    !                              - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] }
+    !
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho)
+    call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    nume   = rho1 * f_rho1 + rho2 * f_rho2
+    mu_val = nume * inv_rho_tot
+    mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume)
+  elseif(j1b_type .eq. 203) then
+
+    ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO
+    !
+    ! RHO = rho(r1) + rho(r2)
+    !
+    ! f[rho] = alpha rho^beta + mu0 
+    !
+    ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) 
+    !                              - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] }
+    !
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf 
+    call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    nume   = rho1 * f_rho1 + rho2 * f_rho2
+    mu_val = nume * inv_rho_tot
+    mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume)
+  elseif(j1b_type .eq. 204) then
+
+    ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} 
+    !
+    ! f[rho] = alpha rho^beta + mu0 
+    !
+    ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)])
+    !                   
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf 
+    call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    mu_val = 0.5d0 * ( f_rho1 + f_rho2)
+    mu_der(1:3) = d_dx_rho_f_rho(1:3) 
+  else
     print *, ' j1b_type = ', j1b_type, 'not implemented yet'
     stop
 
@@ -684,3 +776,76 @@ end function j12_mu_square
 
 ! ---
 
+subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu)
+ implicit none
+ BEGIN_DOC
+! function giving mu as a function of rho
+!
+! f_mu = alpha * rho**beta + mu0 * exp(-rho)
+!
+! and its derivative with respect to rho d_drho_f_mu
+ END_DOC
+ double precision, intent(in)  :: rho,alpha,mu0,beta
+ double precision, intent(out) :: f_mu,d_drho_f_mu
+ f_mu = alpha * (rho)**beta + mu0 * dexp(-rho)
+ d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho)
+
+end
+
+
+subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+ implicit none
+ BEGIN_DOC
+! returns the density in r1,r2 and grad_rho at r1
+ END_DOC
+ double precision, intent(in) :: r1(3),r2(3)
+ double precision, intent(out):: grad_rho1(3),rho1,rho2
+ double precision              :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1)
+ call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b)
+ rho1 = dm_a(1) + dm_b(1)
+ grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1)
+ call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b)
+ rho2 = dm_a(1) + dm_b(1)
+end
+
+subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2)
+ implicit none
+ BEGIN_DOC
+! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1))
+ END_DOC
+ double precision, intent(in) :: rho1,rho2,alpha,mu0,beta
+ double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2
+ double precision :: tmp
+ call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1)
+ call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp)
+end
+
+
+subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2)
+ implicit none
+ BEGIN_DOC
+! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1))
+ END_DOC
+ double precision, intent(in) :: rho1,rho2,alpha,mu0,beta
+ double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2
+ double precision :: tmp
+ call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1)
+ call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp)
+end
+
+subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu)
+ implicit none
+ BEGIN_DOC
+! function giving mu as a function of rho
+!
+! f_mu = alpha * rho**beta + mu0 
+!
+! and its derivative with respect to rho d_drho_f_mu
+ END_DOC
+ double precision, intent(in)  :: rho,alpha,mu0,beta
+ double precision, intent(out) :: f_mu,d_drho_f_mu
+ f_mu = alpha * (rho)**beta + mu0 
+ d_drho_f_mu = alpha * beta * rho**(beta-1.d0) 
+
+end
+
diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f
new file mode 100644
index 00000000..1100cd7c
--- /dev/null
+++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f
@@ -0,0 +1,33 @@
+program plot_mu_of_r
+ implicit none
+ read_wf = .False.
+ touch read_wf 
+ call routine_print
+
+end
+
+
+subroutine routine_print
+ implicit none
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.mu_of_r'
+ i_unit_output = getUnitAndOpen(output,'w')
+ integer :: ipoint,nx
+ double precision :: xmax,xmin,r(3),dx
+ double precision :: mu_val, mu_der(3),dm_a,dm_b,grad
+ xmax =  5.D0
+ xmin = -5.D0
+ nx = 10000
+ dx = (xmax - xmin)/dble(nx)
+ r = 0.d0
+ r(1) = xmin
+ do ipoint = 1, nx
+  call mu_r_val_and_grad(r, r, mu_val, mu_der)
+  call dm_dft_alpha_beta_at_r(r,dm_a,dm_b)
+  grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 
+  grad = dsqrt(grad)
+  write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad
+  r(1) += dx
+ enddo
+end
diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
index 08913bab..ed663f02 100644
--- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f
+++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
@@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
   call hmat_bi_ortho         (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
 
   delta = 0.d0
@@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
     do j = 1, ndet
 
       ! < I | Htilde | J >
-      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
       ! < I | H | J >
       call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
 
@@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
 
   delta = 0.d0
  !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
@@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
     do j = 1, ndet
 
       ! < I | Htilde | J >
-      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
 
       delta(i) = delta(i) + psicoef(j) * htc_tot
     enddo
diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
index 3a715b44..6d5c3b21 100644
--- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
@@ -2,7 +2,7 @@
  BEGIN_PROVIDER [ double precision, e_tilde_00]
  implicit none
  double precision :: hmono,htwoe,hthree,htot
- call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
+ call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
  e_tilde_00 = htot
  END_PROVIDER 
 
@@ -18,11 +18,11 @@
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
   if(degree == 1 .or. degree == 2)then
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
    delta_e = e_tilde_00 - e_i0
    coef_pt1 = htilde_ij / delta_e
-   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
    e_pt2_tc_bi_orth += coef_pt1 * htilde_ij
    if(degree == 1)then
     e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij
@@ -37,7 +37,7 @@
  BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00]
  implicit none
  double precision :: hmono,htwoe,hthree,htilde_ij
- call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
+ call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
  e_tilde_bi_orth_00 += nuclear_repulsion
  END_PROVIDER 
 
@@ -57,7 +57,7 @@
  e_corr_double_bi_orth = 0.d0
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
-  call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+  call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
   if(degree == 1)then
    e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
    e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1))
@@ -80,7 +80,7 @@
  do i = 1, N_det
   accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1)
   do j = 1, N_det
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
    e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1)
   enddo
  enddo
@@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)]
   if(degree==0)then
    coef_pt1_bi_ortho(i) = 1.d0
   else
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
    delta_e = e_tilde_00 - e_i0
    coef_pt1 = htilde_ij / delta_e
    coef_pt1_bi_ortho(i)= coef_pt1
diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
index b7129d36..1d1b26cc 100644
--- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
+++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
@@ -1,4 +1,4 @@
-subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
+subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze)
 
   use bitmasks
 
@@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
 
   v = 0.d0
  !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
@@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
   do istate = 1, N_st
     do i = 1, sze
       do j = 1, sze
-        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+        call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
         v(i,istate) = v(i,istate) + htot * u(j,istate)
       enddo
     enddo 
@@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
 
 end 
 
-subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
+subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze)
 
   use bitmasks
 
@@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
 
   v = 0.d0
 
@@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
   do istate = 1, N_st
     do i = 1, sze
       do j = 1, sze
-        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
+        call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
         v(i,istate) = v(i,istate) + htot * u(j,istate)
       enddo
     enddo
diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f
index 0cf3ca87..0c4198a9 100644
--- a/src/tc_bi_ortho/print_tc_wf.irp.f
+++ b/src/tc_bi_ortho/print_tc_wf.irp.f
@@ -49,12 +49,12 @@ subroutine routine
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
    if(degree == 1 .or. degree == 2)then
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
     delta_e = e_tilde_00 - e_i0
     coef_pt1 = htilde_ij / delta_e
  
-    call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
     contrib_pt = coef_pt1 * htilde_ij
     e_pt2 += contrib_pt
  
diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f
index 50d9dd45..9cb9a600 100644
--- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f
+++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f
@@ -36,11 +36,11 @@ subroutine routine
  e_corr_abs = 0.d0
  e_corr_pos = 0.d0
  e_corr_neg = 0.d0
- call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) 
+ call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) 
  do i = 2, N_det
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) 
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) 
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) 
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) 
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) 
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) 
   call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int)
   call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int)
   call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f
similarity index 88%
rename from src/tc_bi_ortho/slater_tc_3e.irp.f
rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f
index 7b73d5f2..6abb6b78 100644
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f
@@ -1,23 +1,5 @@
-subroutine provide_all_three_ints_bi_ortho
- implicit none
- BEGIN_DOC
-! routine that provides all necessary three-electron integrals 
- END_DOC
- if(three_body_h_tc)then
-  PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
-  PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
-  PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
-  PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
- endif
-if(.not.double_normal_ord)then
-  PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
-  PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
-else
- PROVIDE normal_two_body_bi_orth
-endif
-end
 
-subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree)
 
   BEGIN_DOC
   !  diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
@@ -108,7 +90,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
 end
 
 
-subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
@@ -203,7 +185,7 @@ end
 
 ! ---
 
-subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for double excitation ONLY FOR THREE-BODY TERMS  WITH BI ORTHONORMAL ORBITALS
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index a19d4688..3fd2576a 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -1,3 +1,26 @@
+subroutine provide_all_three_ints_bi_ortho
+ implicit none
+ BEGIN_DOC
+! routine that provides all necessary three-electron integrals 
+ END_DOC
+ if(three_body_h_tc)then
+  if(three_e_3_idx_term)then
+   PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
+   PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
+  endif
+  if(three_e_4_idx_term)then
+   PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
+   PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
+  endif
+  if(.not.double_normal_ord.and.three_e_5_idx_term)then
+   PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
+   PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
+  elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then
+   PROVIDE normal_two_body_bi_orth
+  endif
+ endif
+end
+
 subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
  implicit none
   BEGIN_DOC
diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
index 5a3f9935..531f0141 100644
--- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@@ -7,11 +7,11 @@
 ! Various component of the TC energy for the reference "HF" Slater determinant
  END_DOC 
  double precision :: hmono, htwoe, htot, hthree
- call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot)
+ call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot)
  ref_tc_energy_1e = hmono
  ref_tc_energy_2e = htwoe 
  if(three_body_h_tc)then
-  call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree)
+  call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree)
   ref_tc_energy_3e = hthree
  else
   ref_tc_energy_3e = 0.d0
@@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
 
-  if(three_body_h_tc.and.elec_num.gt.2)then
+  if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then
    !!!!! 3-e part 
    !! same-spin/same-spin
    do j = 1, na
@@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
 
-  if(three_body_h_tc.and.elec_num.gt.2)then
+  if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then
    !!!!! 3-e part 
    !! same-spin/same-spin
    do j = 1, na
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index 1b0e43bb..2d6bfb27 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -42,13 +42,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
    ! opposite spin two-body 
     htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
     if(three_body_h_tc.and.elec_num.gt.2)then
-     if(.not.double_normal_ord)then
+     if(.not.double_normal_ord.and.three_e_5_idx_term)then
       if(degree_i>degree_j)then
        call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
       else
        call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
       endif
-     elseif(double_normal_ord.and.elec_num.gt.2)then
+     elseif(double_normal_ord)then
       htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)
      endif
     endif
@@ -59,13 +59,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
    ! exchange terms 
    htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
    if(three_body_h_tc.and.elec_num.gt.2)then
-    if(.not.double_normal_ord)then
+    if(.not.double_normal_ord.and.three_e_5_idx_term)then
      if(degree_i>degree_j)then
       call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
      else
       call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
      endif
-    elseif(double_normal_ord.and.elec_num.gt.2)then
+    elseif(double_normal_ord)then
       htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)
       htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)
     endif
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index 2f9d83bf..7178d6d9 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h
   htwoe -= buffer_x(i)
  enddo
  hthree = 0.d0
- if (three_body_h_tc.and.elec_num.gt.2)then
+ if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then
   call three_comp_fock_elem(key_i,h,p,spin,hthree)
  endif
 
diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f
similarity index 85%
rename from src/tc_bi_ortho/slater_tc.irp.f
rename to src/tc_bi_ortho/slater_tc_slow.irp.f
index 2c0ae2ca..1833d20f 100644
--- a/src/tc_bi_ortho/slater_tc.irp.f
+++ b/src/tc_bi_ortho/slater_tc_slow.irp.f
@@ -1,7 +1,7 @@
 
 ! ---
 
-subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
+subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
@@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
   if(degree.gt.2)then
     htot = 0.d0
   else
-    call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+    call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
   endif
 
-end subroutine htilde_mu_mat_bi_ortho_tot
+end subroutine htilde_mu_mat_bi_ortho_tot_slow
 
 ! --
 
-subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
 
   BEGIN_DOC
   !
@@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot
   if(degree.gt.2) return
 
   if(degree == 0)then
-    call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+    call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot)
   else if (degree == 1)then
-    call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+    call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
   else if(degree == 2)then
-    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+    call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
   endif
 
   if(three_body_h_tc) then
     if(degree == 2) then
-      if(.not.double_normal_ord) then
-        call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+      if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then
+        call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
       endif
-    else if(degree == 1) then
-      call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
-    else if(degree == 0) then
-      call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+    else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then
+      call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
+    else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then
+      call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree)
     endif
   endif
 
@@ -89,7 +89,7 @@ end
 
 ! ---
 
-subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   !  diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS 
@@ -188,7 +188,7 @@ end
 
 
 
-subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
@@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    return
   endif
 
-!  if(core_tc_op)then
-!   print*,'core_tc_op not already taken into account for bi ortho'
-!   print*,'stopping ...'
-!   stop
-!   do i = 1, Nint
-!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
-!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
-!   enddo
-!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
-!  else
    call bitstring_to_list_ab(key_i, occ, Ne, Nint)
-!  endif
   call get_double_excitation(key_i, key_j, exc, phase, Nint)
   call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
 
@@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    ! opposite spin two-body 
 !   key_j, key_i
     htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
-    if(double_normal_ord.and.+Ne(1).gt.2)then
+    if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then
      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
     endif
   else
@@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
    ! exchange terms 
    htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
-   if(double_normal_ord.and.+Ne(1).gt.2)then
+   if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then
     htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
     htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
    endif
@@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
 end
 
 
-subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
index 4ae44148..4c3c0788 100644
--- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
+++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
@@ -11,10 +11,10 @@
  allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states))
  dressing_dets = 0.d0
  do i = 1, N_det
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
   if(degree == 1 .or. degree == 2)then
-   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
+   call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
   endif
  enddo
  reigvec_tc_bi_orth_tmp = 0.d0
@@ -29,7 +29,7 @@
   vec_tmp(istate,istate) = 1.d0
  enddo
  print*,'Diagonalizing the TC CISD '
- call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+ call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow)
  do i = 1, N_det 
   e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
  enddo
@@ -41,8 +41,8 @@
  it = 0
  dressing_dets = 0.d0
   double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:)
-  external                         htc_bi_ortho_calc_tdav
-  external                         htcdag_bi_ortho_calc_tdav
+  external                         htc_bi_ortho_calc_tdav_slow
+  external                         htcdag_bi_ortho_calc_tdav_slow
   logical                       :: converged
  do while (dabs(E_before-E_current).gt.thr)
   it += 1
@@ -66,7 +66,7 @@
   do istate = N_states+1, n_states_diag
    vec_tmp(istate,istate) = 1.d0
   enddo
-  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow)
   print*,'outside Davidson'
   print*,'eigval_tmp(1) = ',eigval_tmp(1)
   do i = 1, N_det 
diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
index a83d6cd0..fa946d6a 100644
--- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f
+++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
@@ -207,8 +207,6 @@ end
   else ! n_det > N_det_max_full
 
     double precision, allocatable :: H_jj(:),vec_tmp(:,:)
-    external                         htc_bi_ortho_calc_tdav
-    external                         htcdag_bi_ortho_calc_tdav
     external                         H_tc_u_0_opt
     external                         H_tc_dagger_u_0_opt
     external                         H_tc_s2_dagger_u_0_opt
@@ -217,7 +215,7 @@ end
     allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
 
     do i = 1, N_det
-      call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+      call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
     enddo
 
     print*,'---------------------------------'
@@ -259,7 +257,6 @@ end
     do istate = N_states+1, n_states_diag
       vec_tmp(istate,istate) = 1.d0
     enddo
-    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
     !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
     converged = .False.
     i_it = 0
diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f
index 3353d3e7..ec072531 100644
--- a/src/tc_bi_ortho/tc_hmat.irp.f
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@@ -9,33 +9,25 @@
  
   implicit none
   integer          :: i, j
-  double precision :: hmono,htwoe,hthree,htot
+  double precision :: htot
 
   PROVIDE N_int
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
 
- !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) &
+ !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) &
  !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho)
     do i = 1, N_det
       do j = 1, N_det
         ! < J | Htilde | I >
-        call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+        call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
 
-        !print *, ' hmono  = ', hmono
-        !print *, ' htwoe  = ', htwoe
-        !print *, ' hthree = ', hthree
         htilde_matrix_elmt_bi_ortho(j,i) = htot
       enddo
     enddo
  !$OMP END PARALLEL DO
-! print*,'htilde_matrix_elmt_bi_ortho = '
-! do i = 1, min(100,N_det)
-!  write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i)
-! enddo
-
 
 END_PROVIDER 
 
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
index 291c52ef..a7e4d09e 100644
--- a/src/tc_bi_ortho/tc_som.irp.f
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -56,8 +56,8 @@ subroutine main()
   U_SOM = 0.d0 
   do i = 1, N_det
     if(i == i_HF) cycle
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
     U_SOM += htot_1 * htot_2
   enddo
   U_SOM = 0.5d0 * U_SOM
diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f
index f8f648e8..24bb7017 100644
--- a/src/tc_bi_ortho/tc_utils.irp.f
+++ b/src/tc_bi_ortho/tc_utils.irp.f
@@ -12,7 +12,7 @@ subroutine write_tc_energy()
     do i = 1, N_det
       do j = 1, N_det
         !htot = htilde_matrix_elmt_bi_ortho(i,j)
-        call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
+        call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
         E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot
         !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot
       enddo
@@ -45,7 +45,7 @@ subroutine write_tc_var()
 
     SIGMA_TC = 0.d0
     do j = 2, N_det
-      call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
+      call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
       SIGMA_TC = SIGMA_TC + htot * htot
     enddo
 
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
index 118e481a..cb0c355c 100644
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -35,7 +35,7 @@ subroutine test
      det_i = ref_bitmask
      call do_single_excitation(det_i,h1,p1,s1,i_ok)
      call do_single_excitation(det_i,h2,p2,s2,i_ok)
-     call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+     call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
      call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
      call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
      hthree *= phase
@@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num
     if(i_ok.ne.1)cycle
     call do_single_excitation(det_i,h2,p2,s2,i_ok)
     if(i_ok.ne.1)cycle
-    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
     integer :: hh1, pp1, hh2, pp2, ss1, ss2
@@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num
     if(i_ok.ne.1)cycle
     call do_single_excitation(det_i,h2,p2,s2,i_ok)
     if(i_ok.ne.1)cycle
-    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
     call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f
index 4debe2e2..1f7bdfda 100644
--- a/src/tc_bi_ortho/test_s2_tc.irp.f
+++ b/src/tc_bi_ortho/test_s2_tc.irp.f
@@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson
  external H_tc_s2_u_0_opt
  allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag))
  do i = 1, N_det
-   call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+   call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
  enddo
  ! Preparing the left-eigenvector
  print*,'Computing the left-eigenvector '
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 6721c285..df86ea65 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -31,7 +31,7 @@ subroutine test_h_u0
   u_0(i) = psi_r_coef_bi_ortho(i,1)
  enddo
  call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
- call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det)
+ call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det)
  print*,'difference right '
  accu = 0.d0
  do i = 1, N_det
@@ -42,7 +42,7 @@ subroutine test_h_u0
  do_right = .False.
  v_0_new = 0.d0
  call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
- call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right)
+ call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right)
  print*,'difference left'
  accu = 0.d0
  do i = 1, N_det
@@ -63,7 +63,7 @@ subroutine test_slater_tc_opt
  i_count = 0.d0
  do i = 1, N_det
   do j = 1,N_det
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
    if(dabs(htot).gt.1.d-15)then
      i_count += 1.D0
@@ -99,7 +99,7 @@ subroutine timing_tot
   do j = 1, N_det
 !   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
    i_count += 1.d0
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
   enddo
  enddo
  call wall_time(wall1)
@@ -146,7 +146,7 @@ subroutine timing_diag
  do i = 1, N_det
   do j = i,i 
    i_count += 1.d0
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
   enddo
  enddo
  call wall_time(wall1)
@@ -183,7 +183,7 @@ subroutine timing_single
    if(degree.ne.1)cycle
    i_count += 1.d0
    call wall_time(wall0)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call wall_time(wall1)
    accu += wall1 - wall0
   enddo
@@ -225,7 +225,7 @@ subroutine timing_double
    if(degree.ne.2)cycle
    i_count += 1.d0
    call wall_time(wall0)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call wall_time(wall1)
    accu += wall1 - wall0
   enddo
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index ebd43a7a..b7de067f 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -25,8 +25,7 @@ subroutine test_3e
  implicit none
  double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu
  double precision ::  hmono, htwoe, hthree, htot
- call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
-! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree)
+ call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
  print*,'hmono = ',hmono
  print*,'htwoe = ',htwoe
  print*,'hthree= ',hthree
@@ -88,7 +87,7 @@ subroutine routine_3()
        print*, ' excited det'
        call debug_det(det_i, N_int)
  
-       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
        ref = hthree 
        if(s1 == 1)then
@@ -156,7 +155,7 @@ subroutine routine_tot()
         stop
        endif
  
-       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        print*,htilde_ij
        if(dabs(htilde_ij).lt.1.d-10)cycle
        print*, ' excited det'
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index de638da9..a69f5bac 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included
 interface: ezfio,provider,ocaml
 default: True
 
+[three_e_3_idx_term]
+type: logical
+doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
+[three_e_4_idx_term]
+type: logical
+doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
+[three_e_5_idx_term]
+type: logical
+doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
 [pure_three_body_h_tc]
 type: logical
 doc: If |true|, pure triple excitation three-body terms are included
@@ -130,6 +148,12 @@ doc: a parameter used to define mu(r)
 interface: ezfio, provider, ocaml
 default: 6.203504908994001e-1
 
+[beta_rho_power]
+type: double precision
+doc: a parameter used to define mu(r)
+interface: ezfio, provider, ocaml
+default: 0.5
+
 [thr_degen_tc]
 type: Threshold
 doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue
diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg
index 8606e908..8c11478e 100644
--- a/src/trexio/EZFIO.cfg
+++ b/src/trexio/EZFIO.cfg
@@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file
 interface: ezfio, ocaml, provider
 default: None
 
-[export_rdm]
+[export_basis]
 type: logical
-doc: If True, export two-body reduced density matrix
+doc: If True, export basis set and AOs
 interface: ezfio, ocaml, provider
-default: False
+default: True
+
+[export_mos]
+type: logical
+doc: If True, export basis set and AOs
+interface: ezfio, ocaml, provider
+default: True
 
 [export_ao_one_e_ints]
 type: logical
@@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis
 interface: ezfio, ocaml, provider
 default: False
 
-[export_mo_one_e_ints]
-type: logical
-doc: If True, export one-electron integrals in MO basis
-interface: ezfio, ocaml, provider
-default: False
-
 [export_ao_two_e_ints]
 type: logical
 doc: If True, export two-electron integrals in AO basis
@@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis
 interface: ezfio, ocaml, provider
 default: False
 
+[export_mo_one_e_ints]
+type: logical
+doc: If True, export one-electron integrals in MO basis
+interface: ezfio, ocaml, provider
+default: False
+
 [export_mo_two_e_ints]
 type: logical
 doc: If True, export two-electron integrals in MO basis
@@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis
 interface: ezfio, ocaml, provider
 default: False
 
+[export_rdm]
+type: logical
+doc: If True, export two-body reduced density matrix
+interface: ezfio, ocaml, provider
+default: False
+
diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f
index 3ae0dcb4..f9ecc17f 100644
--- a/src/trexio/export_trexio.irp.f
+++ b/src/trexio/export_trexio.irp.f
@@ -2,6 +2,6 @@ program export_trexio_prog
   implicit none
   read_wf = .True.
   SOFT_TOUCH read_wf
-  call export_trexio
+  call export_trexio(.False.)
 end
 
diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f
index c55ddc5e..f25ae370 100644
--- a/src/trexio/export_trexio_routines.irp.f
+++ b/src/trexio/export_trexio_routines.irp.f
@@ -1,15 +1,17 @@
-subroutine export_trexio
+subroutine export_trexio(update)
   use trexio
   implicit none
   BEGIN_DOC
   !     Exports the wave function in TREXIO format
   END_DOC
 
+  logical, intent(in)            :: update
   integer(trexio_t)              :: f(N_states) ! TREXIO file handle
   integer(trexio_exit_code)      :: rc
   integer                        :: k
   double precision, allocatable  :: factor(:)
   character*(256)  :: filenames(N_states)
+  character :: rw
 
   filenames(1) = trexio_filename
   do k=2,N_states
@@ -18,15 +20,26 @@ subroutine export_trexio
 
   do k=1,N_states
     print *, 'TREXIO file : ', trim(filenames(k))
-    call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak')
+    if (update) then
+      call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak')
+    else
+      call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak')
+    endif
   enddo
   print *, ''
 
+  if (update) then
+     rw = 'u'
+  else
+     rw = 'w'
+  endif
+
+
   do k=1,N_states
     if (backend == 0) then
-      f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc)
+      f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc)
     else if (backend == 1) then
-      f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc)
+      f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc)
     endif
     if (f(k) == 0_8) then
       print *, 'Unable to open TREXIO file for writing'
@@ -171,92 +184,95 @@ subroutine export_trexio
   endif
 
 
+  if (export_basis) then
+
 ! Basis
 ! -----
 
-  print *, 'Basis'
+    print *, 'Basis'
 
+    rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian'))
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian'))
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_basis_prim_num(f(1), prim_num)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_basis_prim_num(f(1), prim_num)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+     rc = trexio_write_basis_shell_num(f(1), shell_num)
+     call trexio_assert(rc, TREXIO_SUCCESS)
 
-   rc = trexio_write_basis_shell_num(f(1), shell_num)
-   call trexio_assert(rc, TREXIO_SUCCESS)
+     rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index)
+     call trexio_assert(rc, TREXIO_SUCCESS)
 
-   rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index)
-   call trexio_assert(rc, TREXIO_SUCCESS)
+     rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom)
+     call trexio_assert(rc, TREXIO_SUCCESS)
 
-   rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom)
-   call trexio_assert(rc, TREXIO_SUCCESS)
+     allocate(factor(shell_num))
+!     if (ao_normalized) then
+!       factor(1:shell_num) = shell_normalization_factor(1:shell_num)
+!     else
+       factor(1:shell_num) = 1.d0
+!     endif
+     rc = trexio_write_basis_shell_factor(f(1), factor)
+     call trexio_assert(rc, TREXIO_SUCCESS)
 
-   allocate(factor(shell_num))
-   if (ao_normalized) then
-     factor(1:shell_num) = shell_normalization_factor(1:shell_num)
-   else
-     factor(1:shell_num) = 1.d0
-   endif
-   rc = trexio_write_basis_shell_factor(f(1), factor)
-   call trexio_assert(rc, TREXIO_SUCCESS)
+     deallocate(factor)
 
-   deallocate(factor)
+    rc = trexio_write_basis_shell_index(f(1), shell_index)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_basis_shell_index(f(1), shell_index)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_basis_exponent(f(1), prim_expo)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_basis_exponent(f(1), prim_expo)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_basis_coefficient(f(1), prim_coef)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_basis_coefficient(f(1), prim_coef)
-  call trexio_assert(rc, TREXIO_SUCCESS)
-
-  allocate(factor(prim_num))
-  if (primitives_normalized) then
-    factor(1:prim_num) = prim_normalization_factor(1:prim_num)
-  else
-    factor(1:prim_num) = 1.d0
-  endif
-  rc = trexio_write_basis_prim_factor(f(1), factor)
-  call trexio_assert(rc, TREXIO_SUCCESS)
-  deallocate(factor)
+    allocate(factor(prim_num))
+    if (primitives_normalized) then
+      factor(1:prim_num) = prim_normalization_factor(1:prim_num)
+    else
+      factor(1:prim_num) = 1.d0
+    endif
+    rc = trexio_write_basis_prim_factor(f(1), factor)
+    call trexio_assert(rc, TREXIO_SUCCESS)
+    deallocate(factor)
 
 
 ! Atomic orbitals
 ! ---------------
 
-  print *, 'AOs'
+    print *, 'AOs'
 
-  rc = trexio_write_ao_num(f(1), ao_num)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_ao_num(f(1), ao_num)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_ao_cartesian(f(1), 1)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_ao_cartesian(f(1), 1)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  rc = trexio_write_ao_shell(f(1), ao_shell)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    rc = trexio_write_ao_shell(f(1), ao_shell)
+    call trexio_assert(rc, TREXIO_SUCCESS)
 
-  integer :: i, pow0(3), powA(3), j, l, nz
-  double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c
-  nz=100
+    integer :: i, pow0(3), powA(3), j, l, nz
+    double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c
+    nz=100
 
-  C_A(1) = 0.d0
-  C_A(2) = 0.d0
-  C_A(3) = 0.d0
+    C_A(1) = 0.d0
+    C_A(2) = 0.d0
+    C_A(3) = 0.d0
+
+    allocate(factor(ao_num))
+    if (ao_normalized) then
+      do i=1,ao_num
+        l = ao_first_of_shell(ao_shell(i))
+        factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
+      enddo
+    else
+      factor(:) = 1.d0
+    endif
+    rc = trexio_write_ao_normalization(f(1), factor)
+    call trexio_assert(rc, TREXIO_SUCCESS)
+    deallocate(factor)
 
-  allocate(factor(ao_num))
-  if (ao_normalized) then
-    do i=1,ao_num
-      l = ao_first_of_shell(ao_shell(i))
-      factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
-    enddo
-  else
-    factor(:) = 1.d0
   endif
-  rc = trexio_write_ao_normalization(f(1), factor)
-  call trexio_assert(rc, TREXIO_SUCCESS)
-  deallocate(factor)
 
 ! One-e AO integrals
 ! ------------------
@@ -375,28 +391,30 @@ subroutine export_trexio
 ! Molecular orbitals
 ! ------------------
 
-  print *, 'MOs'
+  if (export_mos) then
+    print *, 'MOs'
 
-  rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label)))
-  call trexio_assert(rc, TREXIO_SUCCESS)
-
-  do k=1,N_states
-    rc = trexio_write_mo_num(f(k), mo_num)
+    rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label)))
     call trexio_assert(rc, TREXIO_SUCCESS)
-  enddo
 
-  rc = trexio_write_mo_coefficient(f(1), mo_coef)
-  call trexio_assert(rc, TREXIO_SUCCESS)
+    do k=1,N_states
+      rc = trexio_write_mo_num(f(k), mo_num)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+    enddo
 
-  if ( (trim(mo_label) == 'Canonical').and. &
-       (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then
-    rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo)
+    rc = trexio_write_mo_coefficient(f(1), mo_coef)
+    call trexio_assert(rc, TREXIO_SUCCESS)
+
+    if ( (trim(mo_label) == 'Canonical').and. &
+         (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then
+      rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+    endif
+
+    rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1)))
     call trexio_assert(rc, TREXIO_SUCCESS)
   endif
 
-  rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1)))
-  call trexio_assert(rc, TREXIO_SUCCESS)
-
 ! One-e MO integrals
 ! ------------------
 
diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f
index 9f9ad9d6..8c6b79d7 100644
--- a/src/trexio/import_trexio_integrals.irp.f
+++ b/src/trexio/import_trexio_integrals.irp.f
@@ -3,6 +3,7 @@ program import_integrals_ao
   implicit none
   integer(trexio_t)              :: f ! TREXIO file handle
   integer(trexio_exit_code)      :: rc
+  PROVIDE mo_num
 
   f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc)
   if (f == 0_8) then
@@ -42,10 +43,10 @@ subroutine run(f)
 
   if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then
     rc = trexio_read_nucleus_repulsion(f, s)
-    call trexio_assert(rc, TREXIO_SUCCESS)
     if (rc /= TREXIO_SUCCESS) then
       print *, irp_here, rc
       print *, 'Error reading nuclear repulsion'
+      call trexio_assert(rc, TREXIO_SUCCESS)
       stop -1
     endif
     call ezfio_set_nuclei_nuclear_repulsion(s)
@@ -63,6 +64,7 @@ subroutine run(f)
     if (rc /= TREXIO_SUCCESS) then
       print *, irp_here
       print *, 'Error reading AO overlap'
+      call trexio_assert(rc, TREXIO_SUCCESS)
       stop -1
     endif
     call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A)
@@ -74,6 +76,7 @@ subroutine run(f)
     if (rc /= TREXIO_SUCCESS) then
       print *, irp_here
       print *, 'Error reading AO kinetic integrals'
+      call trexio_assert(rc, TREXIO_SUCCESS)
       stop -1
     endif
     call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A)
@@ -85,6 +88,7 @@ subroutine run(f)
 !    if (rc /= TREXIO_SUCCESS) then
 !      print *, irp_here
 !      print *, 'Error reading AO ECP local integrals'
+!      call trexio_assert(rc, TREXIO_SUCCESS)
 !      stop -1
 !    endif
 !    call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A)
@@ -96,6 +100,7 @@ subroutine run(f)
     if (rc /= TREXIO_SUCCESS) then
       print *, irp_here
       print *, 'Error reading AO potential N-e integrals'
+      call trexio_assert(rc, TREXIO_SUCCESS)
       stop -1
     endif
     call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A)
@@ -106,41 +111,112 @@ subroutine run(f)
 
   ! AO 2e integrals
   ! ---------------
-  PROVIDE ao_integrals_map
 
-  integer*4 :: BUFSIZE
-  BUFSIZE=ao_num**2
-  allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE))
-  allocate(Vi(4,BUFSIZE), V(BUFSIZE))
+  rc = trexio_has_ao_2e_int(f)
+  PROVIDE ao_num
+  if (rc /= TREXIO_HAS_NOT) then
+      PROVIDE ao_integrals_map
 
-  integer*8 :: offset, icount
+      integer*4 :: BUFSIZE
+      BUFSIZE=ao_num**2
+      allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE))
+      allocate(Vi(4,BUFSIZE), V(BUFSIZE))
 
-  offset = 0_8
-  icount = BUFSIZE
-  rc = TREXIO_SUCCESS
-  do while (icount == size(V))
-    rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V)
-    do m=1,icount
-      i = Vi(1,m)
-      j = Vi(2,m)
-      k = Vi(3,m)
-      l = Vi(4,m)
-      integral = V(m)
-      call two_e_integrals_index(i, j, k, l, buffer_i(m) )
-      buffer_values(m) = integral
-    enddo
-    call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values)
-    offset = offset + icount
+      integer*8 :: offset, icount
+
+      offset = 0_8
+      icount = BUFSIZE
+      rc = TREXIO_SUCCESS
+      do while (icount == size(V))
+        rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V)
+        do m=1,icount
+          i = Vi(1,m)
+          j = Vi(2,m)
+          k = Vi(3,m)
+          l = Vi(4,m)
+          integral = V(m)
+          call two_e_integrals_index(i, j, k, l, buffer_i(m) )
+          buffer_values(m) = integral
+        enddo
+        call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values)
+        offset = offset + icount
+        if (rc /= TREXIO_SUCCESS) then
+            exit
+        endif
+      end do
+      n_integrals = offset
+
+      call map_sort(ao_integrals_map)
+      call map_unique(ao_integrals_map)
+
+      call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
+      call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
+
+      deallocate(buffer_i, buffer_values, Vi, V)
+      print *, 'AO integrals read from TREXIO file'
+  else
+      print *, 'AO integrals not found in TREXIO file'
+  endif
+
+  ! MO integrals
+  ! ------------
+
+  allocate(A(mo_num, mo_num))
+  if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then
+    rc = trexio_read_mo_1e_int_core_hamiltonian(f, A)
     if (rc /= TREXIO_SUCCESS) then
-        exit
+      print *, irp_here
+      print *, 'Error reading MO 1e integrals'
+      call trexio_assert(rc, TREXIO_SUCCESS)
+      stop -1
     endif
-  end do
-  n_integrals = offset
+    call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A)
+    call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read')
+  endif
+  deallocate(A)
 
-  call map_sort(ao_integrals_map)
-  call map_unique(ao_integrals_map)
+  ! MO 2e integrals
+  ! ---------------
 
-  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
-  call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
+  rc = trexio_has_mo_2e_int(f)
+  if (rc /= TREXIO_HAS_NOT) then
+
+      BUFSIZE=mo_num**2
+      allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE))
+      allocate(Vi(4,BUFSIZE), V(BUFSIZE))
+
+
+      offset = 0_8
+      icount = BUFSIZE
+      rc = TREXIO_SUCCESS
+      do while (icount == size(V))
+        rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V)
+        do m=1,icount
+          i = Vi(1,m)
+          j = Vi(2,m)
+          k = Vi(3,m)
+          l = Vi(4,m)
+          integral = V(m)
+          call two_e_integrals_index(i, j, k, l, buffer_i(m) )
+          buffer_values(m) = integral
+        enddo
+        call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4))
+        offset = offset + icount
+        if (rc /= TREXIO_SUCCESS) then
+            exit
+        endif
+      end do
+      n_integrals = offset
+
+      call map_sort(mo_integrals_map)
+      call map_unique(mo_integrals_map)
+
+      call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map)
+      call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
+      deallocate(buffer_i, buffer_values, Vi, V)
+      print *, 'MO integrals read from TREXIO file'
+  else
+      print *, 'MO integrals not found in TREXIO file'
+  endif
 
 end
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index ff17ee4e..b548b18a 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
   !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
   !
-  ! WARNING ::: IF fact_k is too smal then: 
+  ! WARNING ::: IF fact_k is too smal then:
   ! returns a "s" function centered in zero
   ! with an inifinite exponent and a zero polynom coef
   END_DOC
@@ -86,7 +86,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   !DIR$ FORCEINLINE
   call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
   if (fact_k < thresh) then
-    ! IF fact_k is too smal then: 
+    ! IF fact_k is too smal then:
     ! returns a "s" function centered in zero
     ! with an inifinite exponent and a zero polynom coef
     P_center = 0.d0
@@ -468,8 +468,6 @@ end subroutine
 
 
 
-
-
 subroutine multiply_poly(b,nb,c,nc,d,nd)
   implicit none
   BEGIN_DOC
@@ -484,33 +482,292 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
 
   integer                        :: ndtmp
   integer                        :: ib, ic, id, k
-  if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0
-    continue
-  else
-    return
-  endif
-  ndtmp = nb+nc
+  if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0
+
+  select case (nb)
+    case (0)
+      call multiply_poly_b0(b,c,nc,d,nd)
+      return
+    case (1)
+      call multiply_poly_b1(b,c,nc,d,nd)
+      return
+    case (2)
+      call multiply_poly_b2(b,c,nc,d,nd)
+      return
+  end select
+
+  select case (nc)
+    case (0)
+      call multiply_poly_c0(b,nb,c,d,nd)
+      return
+    case (1)
+      call multiply_poly_c1(b,nb,c,d,nd)
+      return
+    case (2)
+      call multiply_poly_c2(b,nb,c,d,nd)
+      return
+  end select
+
+  do ib=0,nb
+    do ic = 0,nc
+      d(ib+ic) = d(ib+ic) + c(ic) * b(ib)
+    enddo
+  enddo
+
+  do nd = nb+nc,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+subroutine multiply_poly_b0(b,c,nc,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nc
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:0), c(0:nc)
+  double precision, intent(inout) :: d(0:nc)
+
+  integer                        :: ndtmp
+  integer                        :: ic, id, k
+  if(nc < 0) return !False if nc>=0
 
   do ic = 0,nc
     d(ic) = d(ic) + c(ic) * b(0)
   enddo
 
-  do ib=1,nb
-    d(ib) = d(ib) + c(0) * b(ib)
-    do ic = 1,nc
-      d(ib+ic) = d(ib+ic) + c(ic) * b(ib)
-    enddo
-  enddo
-
-  do nd = ndtmp,0,-1
-    if (d(nd) == 0.d0) then
-      cycle
-    endif
-    exit
+  do nd = nc,0,-1
+    if (d(nd) /= 0.d0) exit
   enddo
 
 end
 
+subroutine multiply_poly_b1(b,c,nc,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nc
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:1), c(0:nc)
+  double precision, intent(inout) :: d(0:1+nc)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(nc < 0) return !False if nc>=0
+
+
+  select case (nc)
+    case (0)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1)
+
+    case (1)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(1) * b(1)
+
+    case default
+      d(0) = d(0) + c(0) * b(0)
+      do ic = 1,nc
+        d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1)
+      enddo
+      d(nc+1) = d(nc+1) + c(nc) * b(1)
+
+  end select
+
+  do nd = 1+nc,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+subroutine multiply_poly_b2(b,c,nc,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nc
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:2), c(0:nc)
+  double precision, intent(inout) :: d(0:2+nc)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(nc < 0) return !False if nc>=0
+
+  select case (nc)
+    case (0)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1)
+      d(2) = d(2) + c(0) * b(2)
+
+    case (1)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(0) * b(2) + c(1) * b(1)
+      d(3) = d(3) + c(1) * b(2)
+
+    case (2)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
+      d(3) = d(3) + c(2) * b(1) + c(1) * b(2)
+      d(4) = d(4) + c(2) * b(2)
+
+    case default
+
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      do ic = 2,nc
+        d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + c(ic-2) * b(2)
+      enddo
+      d(nc+1) = d(nc+1) + c(nc) * b(1) + c(nc-1) * b(2)
+      d(nc+2) = d(nc+2) + c(nc) * b(2)
+
+  end select
+
+  do nd = 2+nc,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+subroutine multiply_poly_c0(b,nb,c,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:nb), c(0:0)
+  double precision, intent(inout) :: d(0:nb)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(nb < 0) return !False if nb>=0
+
+  do ib=0,nb
+      d(ib) = d(ib) + c(0) * b(ib)
+  enddo
+
+  do nd = nb,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+subroutine multiply_poly_c1(b,nb,c,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:nb), c(0:1)
+  double precision, intent(inout) :: d(0:nb+1)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(nb < 0) return !False if nb>=0
+
+  select case (nb)
+    case (0)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(1) * b(0)
+
+    case (1)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(1) * b(1)
+
+    case default
+      d(0) = d(0) + c(0) * b(0)
+      do ib=1,nb
+        d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1)
+      enddo
+      d(nb+1) = d(nb+1) + c(1) * b(nb)
+
+  end select
+
+  do nd = nb+1,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+subroutine multiply_poly_c2(b,nb,c,d,nd)
+  implicit none
+  BEGIN_DOC
+  ! Multiply two polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb
+  integer, intent(out)           :: nd
+  double precision, intent(in)   :: b(0:nb), c(0:2)
+  double precision, intent(inout) :: d(0:nb+2)
+
+  integer                        :: ndtmp
+  integer                        :: ib, ic, id, k
+  if(nb < 0) return !False if nb>=0
+
+  select case (nb)
+    case (0)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(1) * b(0)
+      d(2) = d(2) + c(2) * b(0)
+
+    case (1)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(1) * b(1) + c(2) * b(0)
+      d(3) = d(3) + c(2) * b(1)
+
+    case (2)
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
+      d(3) = d(3) + c(1) * b(2) + c(2) * b(1)
+      d(4) = d(4) + c(2) * b(2)
+
+    case default
+      d(0) = d(0) + c(0) * b(0)
+      d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
+      do ib=2,nb
+        d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2)
+      enddo
+      d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1)
+      d(nb+2) = d(nb+2) + c(2) * b(nb)
+
+  end select
+
+  do nd = nb+2,0,-1
+    if (d(nd) /= 0.d0) exit
+  enddo
+
+end
+
+
+
+
 subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
   implicit none
   BEGIN_DOC
@@ -685,11 +942,11 @@ end subroutine recentered_poly2_v
 subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points)
 
   BEGIN_DOC
-  ! 
+  !
   ! Recenter two polynomials. Special case for b=(0,0,0)
-  ! 
+  !
   ! (x - A)^a (x - B)^0 = (x - P + P - A)^a  (x - Q + Q - B)^0
-  !                     = (x - P + P - A)^a 
+  !                     = (x - P + P - A)^a
   !
   END_DOC
 
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 3b43d607..69873bc0 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U)
 ! U is allocated inside this subroutine
 ! rank is the number of Cholesky vectors depending on tol
 !
-integer :: ndim
-integer, intent(inout)                                        :: rank
-double precision, dimension(ndim, ndim), intent(inout)        :: A
-double precision, dimension(ndim, rank), intent(out)          :: U
-double precision, intent(in)                                  :: tol
+integer                          :: ndim
+integer, intent(inout)           :: rank
+double precision, intent(inout)  :: A(ndim, ndim)
+double precision, intent(out)    :: U(ndim, rank)
+double precision, intent(in)     :: tol
 
 integer, dimension(:), allocatable          :: piv
 double precision, dimension(:), allocatable :: work
 character, parameter :: uplo = "U"
-integer :: N, LDA
+integer :: LDA
 integer :: info
 integer :: k, l, rank0
-external :: dpstrf
 
 rank0 = rank
-N = size(A, dim=1)
-LDA = N
-allocate(piv(N))
-allocate(work(2*N))
-call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info)
+LDA = ndim
+allocate(piv(ndim))
+allocate(work(2*ndim))
+call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info)
 
 if (rank > rank0) then
   print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling'
   stop
 end if
 
-do k = 1, N
-  A(k+1:, k) = 0.00D+0
+do k = 1, ndim
+  A(k+1:ndim, k) = 0.00D+0
 end do
 ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer
 ! to do the swapping in-place
 U(:,:) = 0.00D+0
-do k = 1, N
+do k = 1, ndim
   l = piv(k)
-  U(l, :) = A(1:rank, k)
+  U(l, 1:rank) = A(1:rank, k)
 end do
 
 end subroutine pivoted_cholesky
diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f
index 33e0cbae..fc1451ba 100644
--- a/src/utils_cc/energy.irp.f
+++ b/src/utils_cc/energy.irp.f
@@ -5,9 +5,8 @@ subroutine det_energy(det,energy)
   integer(bit_kind), intent(in) :: det
 
   double precision, intent(out) :: energy
+  double precision, external :: diag_H_mat_elem
 
-  call i_H_j(det,det,N_int,energy)
+  energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion
 
-  energy = energy + nuclear_repulsion
-  
 end
diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f
index 9e244d82..485d7002 100644
--- a/src/utils_cc/mo_integrals_cc.irp.f
+++ b/src/utils_cc/mo_integrals_cc.irp.f
@@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f)
   integer                       :: i1,i2,idx1,idx2
 
   allocate(tmp_F(mo_num,mo_num))
-  
+
   call get_fock_matrix_spin(det,1,tmp_F)
 
   !$OMP PARALLEL &
@@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f)
   !$OMP END PARALLEL
 
   deallocate(tmp_F)
-  
+
 end
 
 ! V
@@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
   integer, intent(in)           :: list1(n1),list2(n2),list3(n3),list4(n4)
   double precision, intent(out) :: v(n1,n2,n3,n4)
 
-  integer                       :: i1,i2,i3,i4,idx1,idx2,idx3,idx4
-  double precision              :: get_two_e_integral
-  
-  PROVIDE mo_two_e_integrals_in_map
+  integer                       :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k
 
+  double precision, allocatable :: buffer(:,:,:)
   !$OMP PARALLEL &
-  !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) &
-  !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)&
+  !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
+  !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)&
   !$OMP DEFAULT(NONE)
-  !$OMP DO collapse(3)
+  allocate(buffer(mo_num,mo_num,mo_num))
+  !$OMP DO
   do i4 = 1, n4
-    do i3 = 1, n3
-      do i2 = 1, n2
+    idx4 = list4(i4)
+    call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
+       cholesky_mo_transp, cholesky_ao_num, &
+       cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
+    do i2 = 1, n2
+      idx2 = list2(i2)
+      do i3 = 1, n3
+        idx3 = list3(i3)
         do i1 = 1, n1
-          idx4 = list4(i4)
-          idx3 = list3(i3)
-          idx2 = list2(i2)
           idx1 = list1(i1)
-          v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map)
+          v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2)
         enddo
       enddo
     enddo
   enddo
   !$OMP END DO
+  deallocate(buffer)
   !$OMP END PARALLEL
-  
+
+
 end
 
 ! full
 
 BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
-
   implicit none
-
-  integer          :: i,j,k,l
-  double precision :: get_two_e_integral
-  
-  PROVIDE mo_two_e_integrals_in_map
-
+  integer                       :: i1,i2,i3,i4,k
+  double precision, allocatable :: buffer(:,:,:)
   !$OMP PARALLEL &
-  !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) &
-  !$OMP PRIVATE(i,j,k,l) &
+  !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
+  !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)&
   !$OMP DEFAULT(NONE)
-  
-  !$OMP DO collapse(3)
-  do l = 1, mo_num
-    do k = 1, mo_num
-      do j = 1, mo_num
-        do i = 1, mo_num
-          cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map)
+  allocate(buffer(mo_num,mo_num,mo_num))
+  !$OMP DO
+  do i4 = 1, mo_num
+    call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
+         cholesky_mo_transp, cholesky_ao_num, &
+         cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
+    do i2 = 1, mo_num
+      do i3 = 1, mo_num
+        do i1 = 1, mo_num
+          cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2)
         enddo
       enddo
     enddo
   enddo
   !$OMP END DO
+  deallocate(buffer)
   !$OMP END PARALLEL
-       
+
 END_PROVIDER
 
 ! oooo
@@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]
   allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo))
 
   call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v)
-  
+
   do q = 1, cc_n_mo
     do p = 1, cc_n_mo
       cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q)
@@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)]
   enddo
 
   FREE cc_space_v_vvvv
-  
+
 END_PROVIDER
 
 ! iaia
@@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n
   integer :: i,j,a,b
 
   allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa))
-  
+
   call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v)
 
   !$OMP PARALLEL &
@@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n
   integer :: i,j,a,b
 
   allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa))
-  
+
   call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v)
 
   !$OMP PARALLEL &
@@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift)
   else
     shift = n_S(1)
   endif
-  
+
 end
 
 ! F
@@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f)
   ! Compute the Fock matrix corresponding to two lists of spin orbitals.
   ! Ex: occ/occ, occ/vir,...
   END_DOC
-  
+
   integer(bit_kind), intent(in) :: det(N_int,2)
   integer, intent(in)           :: n1,n2, n1_S(2), n2_S(2)
   integer, intent(in)           :: list1(n1,2), list2(n2,2)
   integer, intent(in)           :: dim1, dim2
-  
+
   double precision, intent(out) :: f(dim1, dim2)
 
   double precision, allocatable :: tmp_F(:,:)
   integer                       :: i,j, idx_i,idx_j,i_shift,j_shift
   integer                       :: tmp_i,tmp_j
   integer                       :: si,sj,s
+  PROVIDE big_array_exchange_integrals big_array_coulomb_integrals
 
   allocate(tmp_F(mo_num,mo_num))
-  
+
   do sj = 1, 2
     call shift_idx_spin(sj,n2_S,j_shift)
     do si = 1, 2
@@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f)
 
     enddo
   enddo
-  
+
   deallocate(tmp_F)
-  
+
 end
 
 ! Get F
@@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f)
   BEGIN_DOC
   ! Fock matrix alpha or beta of an arbitrary det
   END_DOC
-  
+
   integer(bit_kind), intent(in) :: det(N_int,2)
   integer, intent(in)           :: s
-  
+
   double precision, intent(out) :: f(mo_num,mo_num)
-  
+
   integer                       :: p,q,i,s1,s2
   integer(bit_kind)             :: res(N_int,2)
   logical                       :: ok
@@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f)
     s1 = 2
     s2 = 1
   endif
-  
+
+  PROVIDE big_array_coulomb_integrals big_array_exchange_integrals
+
   !$OMP PARALLEL &
-  !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) &
+  !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) &
   !$OMP PRIVATE(p,q,ok,i,res)&
   !$OMP DEFAULT(NONE)
   !$OMP DO collapse(1)
@@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f)
       do i = 1, mo_num
         call apply_hole(det, s1, i, res, ok, N_int)
         if (ok) then
-          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
+!          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q)
+          f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q)
         endif
       enddo
       do i = 1, mo_num
         call apply_hole(det, s2, i, res, ok, N_int)
         if (ok) then
-          f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i)
+          f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q)
         endif
       enddo
     enddo
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-    
+
 end
 
 ! V
@@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
   integer                       :: si,sj,sk,sl,s
 
   PROVIDE cc_space_v
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) &
   !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, &
   !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,&
   !$OMP tmp_i,tmp_j,tmp_k,tmp_l)&
   !$OMP DEFAULT(NONE)
-  
+
   do sl = 1, 2
     call shift_idx_spin(sl,n4_S,l_shift)
     do sk = 1, 2
@@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
         call shift_idx_spin(sj,n2_S,j_shift)
         do si = 1, 2
           call shift_idx_spin(si,n1_S,i_shift)
-    
+
           s = si+sj+sk+sl
           ! <aa||aa> or <bb||bb>
           if (s == 4 .or. s == 8) then
@@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
             do tmp_l = 1, n4_S(sl)
               do tmp_k = 1, n3_S(sk)
                 do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)  
+                  do tmp_i = 1, n1_S(si)
                     l = list4(tmp_l,sl)
                     idx_l = tmp_l + l_shift
                     k = list3(tmp_k,sk)
@@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
               enddo
             enddo
             !$OMP END DO
-            
+
           ! <ab||ab> or <ba||ba>
           elseif (si == sk .and. sj == sl) then
             !$OMP DO collapse(3)
             do tmp_l = 1, n4_S(sl)
               do tmp_k = 1, n3_S(sk)
                 do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)  
+                  do tmp_i = 1, n1_S(si)
                     l = list4(tmp_l,sl)
                     idx_l = tmp_l + l_shift
                     k = list3(tmp_k,sk)
@@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
               enddo
             enddo
             !$OMP END DO
-            
+
           ! <ab||ba> or <ba||ab>
           elseif (si == sl .and. sj == sk) then
             !$OMP DO collapse(3)
             do tmp_l = 1, n4_S(sl)
               do tmp_k = 1, n3_S(sk)
                 do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)  
+                  do tmp_i = 1, n1_S(si)
                     l = list4(tmp_l,sl)
                     idx_l = tmp_l + l_shift
                     k = list3(tmp_k,sk)
@@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
             do tmp_l = 1, n4_S(sl)
               do tmp_k = 1, n3_S(sk)
                 do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)  
+                  do tmp_i = 1, n1_S(si)
                     l = list4(tmp_l,sl)
                     idx_l = tmp_l + l_shift
                     k = list3(tmp_k,sk)
@@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
             enddo
             !$OMP END DO
           endif
-          
+
         enddo
       enddo
     enddo
   enddo
   !$OMP END PARALLEL
-  
+
 end
 
 ! V_3idx
@@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
   call shift_idx_spin(sl,n4_S,l_shift)
   tmp_l = idx_l - l_shift
   l = list4(tmp_l,sl)
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) &
   !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, &
   !$OMP i,j,k,idx_i,idx_j,idx_k,&
   !$OMP tmp_i,tmp_j,tmp_k)&
   !$OMP DEFAULT(NONE)
-  
+
   do sk = 1, 2
     call shift_idx_spin(sk,n3_S,k_shift)
     do sj = 1, 2
       call shift_idx_spin(sj,n2_S,j_shift)
       do si = 1, 2
         call shift_idx_spin(si,n1_S,i_shift)
-  
+
         s = si+sj+sk+sl
         ! <aa||aa> or <bb||bb>
         if (s == 4 .or. s == 8) then
           !$OMP DO collapse(2)
           do tmp_k = 1, n3_S(sk)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 k = list3(tmp_k,sk)
                 idx_k = tmp_k + k_shift
                 j = list2(tmp_j,sj)
@@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ab> or <ba||ba>
         elseif (si == sk .and. sj == sl) then
           !$OMP DO collapse(2)
           do tmp_k = 1, n3_S(sk)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 k = list3(tmp_k,sk)
                 idx_k = tmp_k + k_shift
                 j = list2(tmp_j,sj)
@@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ba> or <ba||ab>
         elseif (si == sl .and. sj == sk) then
           !$OMP DO collapse(2)
           do tmp_k = 1, n3_S(sk)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 k = list3(tmp_k,sk)
                 idx_k = tmp_k + k_shift
                 j = list2(tmp_j,sj)
@@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
           !$OMP DO collapse(2)
           do tmp_k = 1, n3_S(sk)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 k = list3(tmp_k,sk)
                 idx_k = tmp_k + k_shift
                 j = list2(tmp_j,sj)
@@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
           enddo
           !$OMP END DO
         endif
-        
+
       enddo
     enddo
   enddo
   !$OMP END PARALLEL
-  
+
 end
 
 ! V_3idx_ij_l
@@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
   call shift_idx_spin(sk,n3_S,k_shift)
   tmp_k = idx_k - k_shift
   k = list3(tmp_k,sk)
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) &
   !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, &
   !$OMP i,j,l,idx_i,idx_j,idx_l,&
   !$OMP tmp_i,tmp_j,tmp_l)&
   !$OMP DEFAULT(NONE)
-  
+
   do sl = 1, 2
     call shift_idx_spin(sl,n4_S,l_shift)
     do sj = 1, 2
       call shift_idx_spin(sj,n2_S,j_shift)
       do si = 1, 2
         call shift_idx_spin(si,n1_S,i_shift)
-  
+
         s = si+sj+sk+sl
         ! <aa||aa> or <bb||bb>
         if (s == 4 .or. s == 8) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 j = list2(tmp_j,sj)
@@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ab> or <ba||ba>
         elseif (si == sk .and. sj == sl) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 j = list2(tmp_j,sj)
@@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ba> or <ba||ab>
         elseif (si == sl .and. sj == sk) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 j = list2(tmp_j,sj)
@@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 j = list2(tmp_j,sj)
@@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
           enddo
           !$OMP END DO
         endif
-        
+
       enddo
     enddo
   enddo
   !$OMP END PARALLEL
-  
+
 end
 
 ! V_3idx_i_kl
@@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
   call shift_idx_spin(sj,n2_S,j_shift)
   tmp_j = idx_j - j_shift
   j = list2(tmp_j,sj)
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) &
   !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, &
   !$OMP i,k,l,idx_i,idx_k,idx_l,&
   !$OMP tmp_i,tmp_k,tmp_l)&
   !$OMP DEFAULT(NONE)
-  
+
   do sl = 1, 2
     call shift_idx_spin(sl,n4_S,l_shift)
     do sk = 1, 2
       call shift_idx_spin(sk,n3_S,k_shift)
       do si = 1, 2
         call shift_idx_spin(si,n1_S,i_shift)
-  
+
         s = si+sj+sk+sl
         ! <aa||aa> or <bb||bb>
         if (s == 4 .or. s == 8) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 k = list3(tmp_k,sk)
@@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ab> or <ba||ba>
         elseif (si == sk .and. sj == sl) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 k = list3(tmp_k,sk)
@@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
             enddo
           enddo
           !$OMP END DO
-          
+
         ! <ab||ba> or <ba||ab>
         elseif (si == sl .and. sj == sk) then
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 k = list3(tmp_k,sk)
@@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
           !$OMP DO collapse(2)
           do tmp_l = 1, n4_S(sl)
             do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)  
+              do tmp_i = 1, n1_S(si)
                 l = list4(tmp_l,sl)
                 idx_l = tmp_l + l_shift
                 k = list3(tmp_k,sk)
@@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
           enddo
           !$OMP END DO
         endif
-        
+
       enddo
     enddo
   enddo
   !$OMP END PARALLEL
-  
+
 end
diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f
index dbd4f4bd..0cf8626c 100644
--- a/src/utils_cc/update_t.irp.f
+++ b/src/utils_cc/update_t.irp.f
@@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1)
   !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) &
   !$OMP PRIVATE(i,a) &
   !$OMP DEFAULT(NONE)
-  !$OMP DO collapse(1)
+  !$OMP DO 
   do a = 1, nV
     do i = 1, nO
       t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift)
@@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2)
   !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  !$OMP DO collapse(3)
+  !$OMP DO 
   do b = 1, nV
     do a = 1, nV
       do j = 1, nO