diff --git a/external/irpf90 b/external/irpf90
index beac6153..4ab1b175 160000
--- a/external/irpf90
+++ b/external/irpf90
@@ -1 +1 @@
-Subproject commit beac615343f421bd6c0571a408ba389a6d5a32ac
+Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6
diff --git a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f
index 51f0cba4..77e4cb9b 100644
--- a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -107,8 +107,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
-  print *, ' providing int2_grad1_u12_ao_transp ...'
-  call wall_time(wall0)
+  !print *, ' providing int2_grad1_u12_ao_transp ...'
+  !call wall_time(wall0)
 
   if(test_cycle_tc) then
 
@@ -142,15 +142,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
 
   endif
 
-  call wall_time(wall1)
-  print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
-  call print_memory_usage()
+  !call wall_time(wall1)
+  !print *, ' wall time for int2_grad1_u12_ao_transp (min) = ', (wall1 - wall0) / 60.d0
+  !call print_memory_usage()
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
+BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
 
   implicit none
   integer          :: ipoint
@@ -159,7 +159,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
   PROVIDE mo_l_coef mo_r_coef
   PROVIDE int2_grad1_u12_ao_transp
 
-  !print *, ' providing int2_grad1_u12_bimo_transp'
+  !print *, ' providing int2_grad1_u12_bimo_transp ...'
   !call wall_time(wall0)
 
   !$OMP PARALLEL         &
@@ -167,33 +167,35 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
   !$OMP PRIVATE (ipoint) & 
   !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
   !$OMP DO SCHEDULE (dynamic)
-   do ipoint = 1, n_points_final_grid
-     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-   enddo
+  do ipoint = 1, n_points_final_grid
+    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                          , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                          , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                          , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+  enddo
   !$OMP END DO
   !$OMP END PARALLEL
 
+  !FREE int2_grad1_u12_ao_transp
+
   !call wall_time(wall1)
-  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  !print *, ' wall time for int2_grad1_u12_bimo_transp (min) =', (wall1 - wall0) / 60.d0
   !call print_memory_usage()
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)]
+BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)]
 
   implicit none
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
   !call wall_time(wall0)
-  !print *, ' Providing int2_grad1_u12_bimo_t ...'
+  !print *, ' providing int2_grad1_u12_bimo_t ...'
 
   PROVIDE mo_l_coef mo_r_coef
   PROVIDE int2_grad1_u12_bimo_transp
@@ -211,17 +213,21 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,
   FREE int2_grad1_u12_bimo_transp
 
   !call wall_time(wall1)
-  !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0
+  !print *, ' wall time for int2_grad1_u12_bimo_t (min) =', (wall1 - wall0) / 60.d0
   !call print_memory_usage()
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
 
   implicit none
-  integer :: i, j, ipoint
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  !call wall_time(wall0)
+  !print *, ' providing int2_grad1_u12_ao_t ...'
 
   PROVIDE int2_grad1_u12_ao
 
@@ -235,6 +241,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3,
     enddo
   enddo
 
+  !call wall_time(wall1)
+  !print *, ' wall time for int2_grad1_u12_ao_t (min) =', (wall1 - wall0) / 60.d0
+  !call print_memory_usage()
+
 END_PROVIDER 
 
 ! ---
@@ -275,8 +285,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid,
   double precision :: xyz
   double precision :: wall0, wall1
  
-  print*, ' providing x_W_ki_bi_ortho_erf_rk ...'
-  call wall_time(wall0)
+  !print*, ' providing x_W_ki_bi_ortho_erf_rk ...'
+  !call wall_time(wall0)
 
  !$OMP PARALLEL                   &
  !$OMP DEFAULT (NONE)             &
@@ -300,8 +310,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid,
  ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp 
  ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp
 
-  call wall_time(wall1)
-  print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0
+  !call wall_time(wall1)
+  !print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -323,8 +333,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_
   double precision :: xyz
   double precision :: wall0, wall1
  
-  print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...'
-  call wall_time(wall0)
+  !print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...'
+  !call wall_time(wall0)
 
  !$OMP PARALLEL                 &
  !$OMP DEFAULT (NONE)           &
@@ -343,8 +353,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_
  !$OMP END DO
  !$OMP END PARALLEL
 
-  call wall_time(wall1)
-  print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0
+  !call wall_time(wall1)
+  !print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0
 
 END_PROVIDER 
 
diff --git a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f
index 726e48ba..fd4a162f 100644
--- a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -168,7 +168,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
     integral = integral + tmp * final_weight_at_r_vector(ipoint)
   enddo
 
-end subroutine give_integrals_3_body_bi_ort
+end
 
 ! ---
 
diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f
index 5e6a24e9..1e127fac 100644
--- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f
+++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f
@@ -16,10 +16,10 @@ double precision function bi_ortho_mo_ints(l, k, j, i)
   integer             :: m, n, p, q
 
   bi_ortho_mo_ints = 0.d0
-  do m = 1, ao_num
-    do p = 1, ao_num
-      do n = 1, ao_num
-        do q = 1, ao_num
+  do p = 1, ao_num
+    do m = 1, ao_num
+      do q = 1, ao_num
+        do n = 1, ao_num
           !                                   p1h1p2h2   l1                  l2              r1               r2
           bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i)
         enddo
@@ -27,7 +27,7 @@ double precision function bi_ortho_mo_ints(l, k, j, i)
     enddo
   enddo
 
-end function bi_ortho_mo_ints
+end
 
 ! ---
 
@@ -40,38 +40,106 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num,
   END_DOC
 
   implicit none
-  integer                       :: i, j, k, l, m, n, p, q
+  integer                       :: i, j, k, l, m, n, p, q, s, r
+  double precision              :: t1, t2, tt1, tt2
   double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:)
+  double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), ao_two_e_tc_tot_tmp(:,:,:)
+
+  print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...'
+  call wall_time(t1)
+  call print_memory_usage()
 
   PROVIDE mo_r_coef mo_l_coef
 
-  allocate(a2(ao_num,ao_num,ao_num,mo_num))
+  if(ao_to_mo_tc_n3) then
 
-  call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0     &
-            , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num &
-            , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num)
+    print*, ' memory scale of TC ao -> mo: O(N3) '
 
-  allocate(a1(ao_num,ao_num,mo_num,mo_num))
+    if(.not.read_tc_integ) then
+       stop 'read_tc_integ needs to be set to true'
+    endif
 
-  call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 &
-            , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num          &
-            , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num)
+    allocate(a_jkp(ao_num,ao_num,mo_num))
+    allocate(a_kpq(ao_num,mo_num,mo_num))
+    allocate(ao_two_e_tc_tot_tmp(ao_num,ao_num,ao_num))
 
-  deallocate(a2)
-  allocate(a2(ao_num,mo_num,mo_num,mo_num))
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read")
 
-  call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 &
-            , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num          &
-            , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num)
+    call wall_time(tt1)
 
-  deallocate(a1)
+    mo_bi_ortho_tc_two_e_chemist(:,:,:,:) = 0.d0
+    do l = 1, ao_num
+      read(11) ao_two_e_tc_tot_tmp(:,:,:)
 
-  call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 &
-            , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num          &
-            , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num)
+      do s = 1, mo_num
 
-  deallocate(a2)
+        call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0              &
+                  , ao_two_e_tc_tot_tmp(1,1,1), ao_num, mo_l_coef(1,1), ao_num &
+                  , 0.d0, a_jkp(1,1,1), ao_num*ao_num)
 
+        call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 &
+                  , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num  &
+                  , 0.d0, a_kpq(1,1,1), ao_num*mo_num)
+
+        call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, mo_r_coef(l,s) &
+                  , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num            &
+                  , 1.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,s), mo_num*mo_num)
+
+      enddo ! s
+
+      if(l == 2) then
+        call wall_time(tt2)
+        print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0
+        print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0
+      elseif(l == 11) then
+        call wall_time(tt2)
+        print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0
+        print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0)
+      elseif(l == 101) then
+        call wall_time(tt2)
+        print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0
+        print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0)
+      endif
+    enddo ! l
+
+    close(11)
+
+    deallocate(a_jkp, a_kpq, ao_two_e_tc_tot_tmp)
+
+  else
+
+    print*, ' memory scale of TC ao -> mo: O(N4) '
+
+    allocate(a2(ao_num,ao_num,ao_num,mo_num))
+  
+    call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0     &
+              , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num &
+              , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num)
+  
+    FREE ao_two_e_tc_tot
+
+    allocate(a1(ao_num,ao_num,mo_num,mo_num))
+  
+    call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 &
+              , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num          &
+              , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num)
+  
+    deallocate(a2)
+    allocate(a2(ao_num,mo_num,mo_num,mo_num))
+  
+    call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 &
+              , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num          &
+              , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num)
+  
+    deallocate(a1)
+  
+    call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 &
+              , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num          &
+              , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num)
+  
+    deallocate(a2)
+  
+  endif
 
   !allocate(a1(mo_num,ao_num,ao_num,ao_num))
   !a1 = 0.d0
@@ -135,6 +203,10 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num,
   !enddo
   !deallocate(a1)
 
+  call wall_time(t2)
+  print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_chemist (min)', (t2-t1)/60.d0
+  call print_memory_usage()
+
 END_PROVIDER 
 
 ! ---
@@ -176,6 +248,28 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num,
 
 END_PROVIDER 
 
+BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_two_e_transp(i,j,k,l) = <k l| V(r_12) |i j> = transpose of mo_bi_ortho_tc_two_e
+  !
+  ! the potential V(r_12) contains ALL TWO-E CONTRIBUTION OF THE TC-HAMILTONIAN
+  !
+ END_DOC
+
+ integer :: i,j,k,l
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do k = 1, mo_num
+    do l = 1, mo_num
+     mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e_transp(k,l,i,j)
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
 ! ---
 
  BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj,          (mo_num,mo_num)]
diff --git a/plugins/local/bi_ortho_mos/overlap.irp.f b/plugins/local/bi_ortho_mos/overlap.irp.f
index ff5d5c84..7f07929f 100644
--- a/plugins/local/bi_ortho_mos/overlap.irp.f
+++ b/plugins/local/bi_ortho_mos/overlap.irp.f
@@ -56,10 +56,10 @@
     print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0)
     print*,'And bi orthogonality is off by an average of ',accu_nd
     print*,'****************'
-    print*,'Overlap matrix betwee mo_l_coef and mo_r_coef  '
-    do i = 1, mo_num
-      write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:)
-    enddo
+    !print*,'Overlap matrix betwee mo_l_coef and mo_r_coef  '
+    !do i = 1, mo_num
+    !  write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:)
+    !enddo
   endif
   print*,'Average trace of overlap_bi_ortho (should be 1.)'
   print*,'accu_d  = ',accu_d
diff --git a/plugins/local/mo_localization/README.md b/plugins/local/mo_localization/README.md
index c28a5ee1..512e36af 100644
--- a/plugins/local/mo_localization/README.md
+++ b/plugins/local/mo_localization/README.md
@@ -3,7 +3,7 @@ To localize the MOs:
 ```
 qp run localization  
 ```
-By default, the different otbital classes are automatically set by splitting  
+By default, the different orbital classes are automatically set by splitting  
 the orbitales in the following classes:  
 - Core -> Core  
 - Active, doubly occupied -> Inactive  
diff --git a/plugins/local/non_h_ints_mu/tc_integ.irp.f b/plugins/local/non_h_ints_mu/tc_integ.irp.f
index 775a9e4c..58e3db48 100644
--- a/plugins/local/non_h_ints_mu/tc_integ.irp.f
+++ b/plugins/local/non_h_ints_mu/tc_integ.irp.f
@@ -44,14 +44,92 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f
     elseif(tc_integ_type .eq. "numeric") then
 
       print *, ' Numerical integration over r1 and r2 will be performed'
-  
-      ! TODO combine 1shot & int2_grad1_u12_ao_num
 
-      PROVIDE int2_grad1_u12_ao_num
-      int2_grad1_u12_ao = int2_grad1_u12_ao_num
+      if(tc_save_mem) then
 
-      !PROVIDE int2_grad1_u12_ao_num_1shot
-      !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot
+        integer                       :: n_blocks, n_rest, n_pass
+        integer                       :: i_blocks, i_rest, i_pass, ii
+        double precision              :: mem, n_double
+        double precision, allocatable :: tmp(:,:,:), xx(:)
+        double precision, allocatable :: tmp_grad1_u12(:,:,:)
+
+        PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra
+
+        allocate(tmp(n_points_extra_final_grid,ao_num,ao_num), xx(n_points_extra_final_grid))
+        !$OMP PARALLEL               &
+        !$OMP DEFAULT (NONE)         &
+        !$OMP PRIVATE (j, i, jpoint) &
+        !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp)
+        !$OMP DO COLLAPSE(2)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            do jpoint = 1, n_points_extra_final_grid
+              tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
+            enddo
+          enddo
+        enddo
+        !$OMP END DO
+        !$OMP END PARALLEL
+        call total_memory(mem)
+        mem      = max(1.d0, qp_max_mem - mem)
+        n_double = mem * 1.d8
+        n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid))
+        n_rest   = int(mod(n_points_final_grid, n_blocks))
+        n_pass   = int((n_points_final_grid - n_rest) / n_blocks)
+        call write_int(6, n_pass, 'Number of passes')
+        call write_int(6, n_blocks, 'Size of the blocks')
+        call write_int(6, n_rest, 'Size of the last block')
+        allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3))
+        do i_pass = 1, n_pass
+          ii = (i_pass-1)*n_blocks + 1
+          !$OMP PARALLEL                   &
+          !$OMP DEFAULT (NONE)             &
+          !$OMP PRIVATE (i_blocks, ipoint) &
+          !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12)
+          !$OMP DO 
+          do i_blocks = 1, n_blocks
+            ipoint = ii - 1 + i_blocks ! r1
+            call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), xx(1))
+          enddo
+          !$OMP END DO
+          !$OMP END PARALLEL
+          do m = 1, 3
+            call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0                     &
+                      , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid &
+                      , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num)
+          enddo
+        enddo
+        deallocate(tmp_grad1_u12)
+        if(n_rest .gt. 0) then
+          allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3))
+          ii = n_pass*n_blocks + 1
+          !$OMP PARALLEL                 &
+          !$OMP DEFAULT (NONE)           &
+          !$OMP PRIVATE (i_rest, ipoint) &
+          !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12)
+          !$OMP DO 
+          do i_rest = 1, n_rest
+            ipoint = ii - 1 + i_rest ! r1
+            call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), xx(1))
+          enddo
+          !$OMP END DO
+          !$OMP END PARALLEL
+          do m = 1, 3
+            call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0                       &
+                      , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid &
+                      , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num)
+          enddo
+          deallocate(tmp_grad1_u12)
+        endif
+        deallocate(tmp,xx)
+
+      else
+        ! TODO combine 1shot & int2_grad1_u12_ao_num
+        PROVIDE int2_grad1_u12_ao_num
+        int2_grad1_u12_ao = int2_grad1_u12_ao_num
+        !PROVIDE int2_grad1_u12_ao_num_1shot
+        !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot
+      endif
 
     elseif(tc_integ_type .eq. "semi-analytic") then
 
@@ -177,13 +255,88 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p
 
     print *, ' Numerical integration over r1 and r2 will be performed'
   
-    ! TODO combine 1shot & int2_grad1_u12_square_ao_num
+    if(tc_save_mem) then
 
-    PROVIDE int2_grad1_u12_square_ao_num
-    int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num
+      integer                       :: n_blocks, n_rest, n_pass
+      integer                       :: i_blocks, i_rest, i_pass, ii
+      double precision              :: mem, n_double
+      double precision, allocatable :: tmp(:,:,:), xx(:,:,:)
+      double precision, allocatable :: tmp_grad1_u12_squared(:,:)
 
-    !PROVIDE int2_grad1_u12_square_ao_num_1shot
-    !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot
+      PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra
+
+      allocate(tmp(n_points_extra_final_grid,ao_num,ao_num))
+      !$OMP PARALLEL               &
+      !$OMP DEFAULT (NONE)         &
+      !$OMP PRIVATE (j, i, jpoint) &
+      !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp)
+      !$OMP DO COLLAPSE(2)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do jpoint = 1, n_points_extra_final_grid
+            tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+      call total_memory(mem)
+      mem      = max(1.d0, qp_max_mem - mem)
+      n_double = mem * 1.d8
+      n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid))
+      n_rest   = int(mod(n_points_final_grid, n_blocks))
+      n_pass   = int((n_points_final_grid - n_rest) / n_blocks)
+      call write_int(6, n_pass, 'Number of passes')
+      call write_int(6, n_blocks, 'Size of the blocks')
+      call write_int(6, n_rest, 'Size of the last block')
+      allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks), xx(n_points_extra_final_grid,n_blocks,3))
+      do i_pass = 1, n_pass
+        ii = (i_pass-1)*n_blocks + 1
+        !$OMP PARALLEL                   &
+        !$OMP DEFAULT (NONE)             &
+        !$OMP PRIVATE (i_blocks, ipoint) &
+        !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared)
+        !$OMP DO 
+        do i_blocks = 1, n_blocks
+          ipoint = ii - 1 + i_blocks ! r1
+          call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_blocks,1), xx(1,i_blocks,2), xx(1,i_blocks,3), tmp_grad1_u12_squared(1,i_blocks))
+        enddo
+        !$OMP END DO
+        !$OMP END PARALLEL
+        call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0                         &
+                  , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid &
+                  , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num)
+      enddo
+      deallocate(tmp_grad1_u12_squared, xx)
+      if(n_rest .gt. 0) then
+        ii = n_pass*n_blocks + 1
+        allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest), xx(n_points_extra_final_grid,n_rest,3))
+        !$OMP PARALLEL                 &
+        !$OMP DEFAULT (NONE)           &
+        !$OMP PRIVATE (i_rest, ipoint) &
+        !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared)
+        !$OMP DO 
+        do i_rest = 1, n_rest
+          ipoint = ii - 1 + i_rest ! r1
+          call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_rest,1), xx(1,i_rest,2), xx(1,i_rest,3), tmp_grad1_u12_squared(1,i_rest))
+        enddo
+        !$OMP END DO
+        !$OMP END PARALLEL
+        call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0                           &
+                  , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid &
+                  , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num)
+        deallocate(tmp_grad1_u12_squared, xx)
+      endif
+      deallocate(tmp)
+
+    else
+
+      ! TODO combine 1shot & int2_grad1_u12_square_ao_num
+      PROVIDE int2_grad1_u12_square_ao_num
+      int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num
+      !PROVIDE int2_grad1_u12_square_ao_num_1shot
+      !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot
+    endif
 
   elseif(tc_integ_type .eq. "semi-analytic") then
 
diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
index e5d75c3d..6d446037 100644
--- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
+++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
@@ -131,7 +131,7 @@
   deallocate(tmp)
 
   call wall_time(time1)
-  print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 
+  print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num = (min)', (time1-time0) / 60.d0
   call print_memory_usage()
 
 END_PROVIDER
diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f
index 9d3cf565..a1bbd6e0 100644
--- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f
+++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f
@@ -33,8 +33,10 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
   double precision              :: weight1, ao_k_r, ao_i_r
   double precision              :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
   double precision              :: time0, time1
-  double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:)
+  double precision, allocatable :: c_mat(:,:,:)
+  logical,          external    :: ao_two_e_integral_zero
   double precision, external    :: get_ao_two_e_integral
+  double precision, external    :: ao_two_e_integral
 
   PROVIDe tc_integ_type
   PROVIDE env_type
@@ -53,7 +55,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
     print*, ' Reading ao_two_e_tc_tot from ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot'
 
     open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read")
-      read(11) ao_two_e_tc_tot
+    do i = 1, ao_num
+      read(11) ao_two_e_tc_tot(:,:,:,i)
+    enddo
     close(11)
 
   else
@@ -65,27 +69,59 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
     PROVIDE int2_grad1_u12_square_ao
 
-    allocate(c_mat(n_points_final_grid,ao_num,ao_num))
+    if(tc_save_mem_loops) then
 
-    !$OMP PARALLEL               &
-    !$OMP DEFAULT (NONE)         &
-    !$OMP PRIVATE (i, k, ipoint) &
-    !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
-    !$OMP DO SCHEDULE (static)
-    do i = 1, ao_num
-      do k = 1, ao_num
-        do ipoint = 1, n_points_final_grid
-          c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...'
+
+      !$OMP PARALLEL                                              &
+      !$OMP DEFAULT (NONE)                                        &
+      !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) &
+      !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, &
+      !$OMP         aos_in_r_array_transp, final_weight_at_r_vector, int2_grad1_u12_square_ao)
+      !$OMP DO COLLAPSE(4)
+      do i = 1, ao_num
+        do k = 1, ao_num
+          do l = 1, ao_num
+            do j = 1, ao_num
+              ao_two_e_tc_tot(j,l,k,i) = 0.d0
+              do ipoint = 1, n_points_final_grid
+                weight1 = final_weight_at_r_vector(ipoint)
+                ao_i_r = aos_in_r_array_transp(ipoint,i)
+                ao_k_r = aos_in_r_array_transp(ipoint,k)
+                ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) + int2_grad1_u12_square_ao(j,l,ipoint) * weight1 * ao_i_r * ao_k_r
+              enddo
+            enddo
+          enddo
         enddo
       enddo
-    enddo
-    !$OMP END DO
-    !$OMP END PARALLEL
+      !$OMP END DO
+      !$OMP END PARALLEL
 
-    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                 &
-              , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
-              , 0.d0, ao_two_e_tc_tot, ao_num*ao_num)
+    else
 
+      print*, ' DGEMM are used to evaluate Hermitian part of ao_two_e_tc_tot ...'
+
+      allocate(c_mat(n_points_final_grid,ao_num,ao_num))
+      !$OMP PARALLEL               &
+      !$OMP DEFAULT (NONE)         &
+      !$OMP PRIVATE (i, k, ipoint) &
+      !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+      !$OMP DO SCHEDULE (static)
+      do i = 1, ao_num
+        do k = 1, ao_num
+          do ipoint = 1, n_points_final_grid
+            c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+      call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                 &
+                , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
+                , 0.d0, ao_two_e_tc_tot, ao_num*ao_num)
+      deallocate(c_mat)
+    endif
+  
     FREE int2_grad1_u12_square_ao
 
     if( (tc_integ_type .eq. "semi-analytic")                            .and. &
@@ -96,6 +132,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
       ! an additional term is added here directly instead of 
       ! being added in int2_grad1_u12_square_ao for performance
 
+      allocate(c_mat(n_points_final_grid,ao_num,ao_num))
       PROVIDE int2_u2_env2
 
       !$OMP PARALLEL                                                                                     &
@@ -127,10 +164,13 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
                 , int2_u2_env2(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
                 , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num)
 
+      deallocate(c_mat)
       FREE int2_u2_env2
     endif ! use_ipp
 
-    deallocate(c_mat)
+    call wall_time(time1)
+    print*, ' done with Hermitian part after (min) ', (time1 - time0) / 60.d0
+    call print_memory_usage()
 
     ! ---
 
@@ -138,39 +178,71 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
       PROVIDE int2_grad1_u12_ao
 
-      allocate(b_mat(n_points_final_grid,ao_num,ao_num,3))
+      if(tc_save_mem_loops) then
 
-      !$OMP PARALLEL                                                              &
-      !$OMP DEFAULT (NONE)                                                        &
-      !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
-      !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
-      !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
-      !$OMP DO SCHEDULE (static)
-      do i = 1, ao_num
-        do k = 1, ao_num
-          do ipoint = 1, n_points_final_grid
+        print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...'
 
-            weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
-            ao_i_r  = aos_in_r_array_transp(ipoint,i)
-            ao_k_r  = aos_in_r_array_transp(ipoint,k)
-
-            b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
-            b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
-            b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+        !$OMP PARALLEL                                                 &
+        !$OMP DEFAULT (NONE)                                           &
+        !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1)    &
+        !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot,    &
+        !$OMP         aos_in_r_array_transp, final_weight_at_r_vector, &
+        !$OMP         int2_grad1_u12_ao, aos_grad_in_r_array_transp_bis)
+        !$OMP DO COLLAPSE(4)
+        do i = 1, ao_num
+          do k = 1, ao_num
+            do l = 1, ao_num
+              do j = 1, ao_num
+                do ipoint = 1, n_points_final_grid
+                  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+                  ao_i_r  = aos_in_r_array_transp(ipoint,i)
+                  ao_k_r  = aos_in_r_array_transp(ipoint,k)
+                  ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) &
+                                           - weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) &
+                                           - weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) &
+                                           - weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+                enddo
+              enddo
+            enddo
           enddo
         enddo
-      enddo
-      !$OMP END DO
-      !$OMP END PARALLEL
+        !$OMP END DO
+        !$OMP END PARALLEL
 
-      do m = 1, 3
-        call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0             &
-                  , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
-                  , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num)
-      enddo
-      deallocate(b_mat)
+      else
 
-      FREE int2_grad1_u12_ao
+        print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...'
+
+        allocate(c_mat(n_points_final_grid,ao_num,ao_num))
+        do m = 1, 3
+          !$OMP PARALLEL                                                              &
+          !$OMP DEFAULT (NONE)                                                        &
+          !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+          !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & 
+          !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector, m)
+          !$OMP DO SCHEDULE (static)
+          do i = 1, ao_num
+            do k = 1, ao_num
+              do ipoint = 1, n_points_final_grid
+  
+                weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+                ao_i_r  = aos_in_r_array_transp(ipoint,i)
+                ao_k_r  = aos_in_r_array_transp(ipoint,k)
+  
+                c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m))
+              enddo
+            enddo
+          enddo
+          !$OMP END DO
+          !$OMP END PARALLEL
+
+          call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0           &
+                    , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
+                    , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num)
+        enddo
+        deallocate(c_mat)
+
+      end if
 
       if(tc_integ_type .eq. "semi-analytic") then 
         FREE int2_grad1_u2e_ao
@@ -178,30 +250,67 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
     endif ! var_tc
 
+    call wall_time(time1)
+    print*, ' done with non-Hermitian part after (min) ', (time1 - time0) / 60.d0
+    call print_memory_usage()
+
     ! ---
 
     call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num)
 
-    PROVIDE ao_integrals_map
+    ! ---
+  
+    logical          :: integ_zero
+    double precision :: integ_val
 
-    !$OMP PARALLEL DEFAULT(NONE)                            &
-    !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
-    !$OMP PRIVATE(i, j, k, l)
-    !$OMP DO
-    do j = 1, ao_num
-      do l = 1, ao_num
-        do i = 1, ao_num
-          do k = 1, ao_num
-            !                                                     < 1:i, 2:j | 1:k, 2:l > 
-            ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
+    print*, ' adding ERI to ao_two_e_tc_tot ...'
+
+    if(tc_save_mem) then
+      print*, ' ao_integrals_map will not be used'
+      !$OMP PARALLEL DEFAULT(NONE)                     &
+      !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & 
+      !$OMP SHARED(ao_num, ao_two_e_tc_tot)
+      !$OMP DO COLLAPSE(4)
+      do j = 1, ao_num
+        do l = 1, ao_num
+          do i = 1, ao_num
+            do k = 1, ao_num
+              integ_zero = ao_two_e_integral_zero(i,j,k,l)
+              if(.not. integ_zero) then
+                            ! i,k : r1    j,l : r2
+                integ_val = ao_two_e_integral(i,k,j,l)
+                ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val
+              endif
+            enddo
           enddo
         enddo
       enddo
-    enddo
-    !$OMP END DO
-    !$OMP END PARALLEL
+      !$OMP END DO
+      !$OMP END PARALLEL
+    else
+      print*, ' ao_integrals_map will be used'
+      PROVIDE ao_integrals_map
+      !$OMP PARALLEL DEFAULT(NONE)                            &
+      !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
+      !$OMP PRIVATE(i, j, k, l)
+      !$OMP DO COLLAPSE(4)
+      do j = 1, ao_num
+        do l = 1, ao_num
+          do i = 1, ao_num
+            do k = 1, ao_num
+              !                                                     < 1:i, 2:j | 1:k, 2:l > 
+              ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
+            enddo
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+      !call clear_ao_map()
+      FREE ao_integrals_map
+    endif
 
-    if(tc_integ_type .eq. "numeric") then
+    if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then
       FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num
     endif
 
@@ -211,7 +320,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
     print*, ' Saving ao_two_e_tc_tot in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot'
     open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write")
     call ezfio_set_work_empty(.False.)
-      write(11) ao_two_e_tc_tot
+    do i = 1, ao_num
+      write(11) ao_two_e_tc_tot(:,:,:,i)
+    enddo
     close(11)
     call ezfio_set_tc_keywords_io_tc_integ('Read')
   endif
diff --git a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f
index cb38347e..4d4bc047 100644
--- a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -2144,6 +2144,7 @@ subroutine impose_biorthog_degen_eigvec(n, deg_num, e0, L0, R0)
       enddo
       !print*,' accu_nd after = ', accu_nd
       if(accu_nd .gt. 1d-12) then
+        print*, ' accu_nd =', accu_nd
         print*, ' your strategy for degenerates orbitals failed !'
         print*, m, 'deg on', i
         stop
diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/plugins/local/normal_order_old/NEED
@@ -0,0 +1 @@
+
diff --git a/plugins/local/normal_order_old/README.rst b/plugins/local/normal_order_old/README.rst
new file mode 100644
index 00000000..a284fcfd
--- /dev/null
+++ b/plugins/local/normal_order_old/README.rst
@@ -0,0 +1,4 @@
+================
+normal_order_old
+================
+
diff --git a/plugins/local/tc_bi_ortho/normal_ordered.irp.f b/plugins/local/normal_order_old/normal_ordered.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/normal_ordered.irp.f
rename to plugins/local/normal_order_old/normal_ordered.irp.f
diff --git a/plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f b/plugins/local/normal_order_old/normal_ordered_contractions.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f
rename to plugins/local/normal_order_old/normal_ordered_contractions.irp.f
diff --git a/plugins/local/tc_bi_ortho/normal_ordered_old.irp.f b/plugins/local/normal_order_old/normal_ordered_old.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/normal_ordered_old.irp.f
rename to plugins/local/normal_order_old/normal_ordered_old.irp.f
diff --git a/plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f b/plugins/local/normal_order_old/normal_ordered_v0.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f
rename to plugins/local/normal_order_old/normal_ordered_v0.irp.f
diff --git a/plugins/local/slater_tc/NEED b/plugins/local/slater_tc/NEED
new file mode 100644
index 00000000..ef0aa3f7
--- /dev/null
+++ b/plugins/local/slater_tc/NEED
@@ -0,0 +1,7 @@
+determinants
+normal_order_old
+bi_ort_ints
+bi_ortho_mos
+tc_keywords
+non_hermit_dav
+dav_general_mat
diff --git a/plugins/local/tc_bi_ortho/h_biortho.irp.f b/plugins/local/slater_tc/h_biortho.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/h_biortho.irp.f
rename to plugins/local/slater_tc/h_biortho.irp.f
diff --git a/plugins/local/tc_bi_ortho/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/h_mat_triple.irp.f
rename to plugins/local/slater_tc/h_mat_triple.irp.f
diff --git a/plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
rename to plugins/local/slater_tc/h_tc_bi_ortho_psi.irp.f
diff --git a/plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f b/plugins/local/slater_tc/h_tc_s2_u0.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f
rename to plugins/local/slater_tc/h_tc_s2_u0.irp.f
diff --git a/plugins/local/slater_tc/slater_tc.irp.f b/plugins/local/slater_tc/slater_tc.irp.f
new file mode 100644
index 00000000..27ab47c5
--- /dev/null
+++ b/plugins/local/slater_tc/slater_tc.irp.f
@@ -0,0 +1,7 @@
+program slater_tc
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+end
diff --git a/plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc/slater_tc_3e_slow.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f
rename to plugins/local/slater_tc/slater_tc_3e_slow.irp.f
diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_opt.irp.f
rename to plugins/local/slater_tc/slater_tc_opt.irp.f
diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f
rename to plugins/local/slater_tc/slater_tc_opt_diag.irp.f
diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f
rename to plugins/local/slater_tc/slater_tc_opt_double.irp.f
diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f
rename to plugins/local/slater_tc/slater_tc_opt_single.irp.f
diff --git a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f b/plugins/local/slater_tc/slater_tc_slow.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/slater_tc_slow.irp.f
rename to plugins/local/slater_tc/slater_tc_slow.irp.f
diff --git a/plugins/local/tc_bi_ortho/tc_hmat.irp.f b/plugins/local/slater_tc/tc_hmat.irp.f
similarity index 100%
rename from plugins/local/tc_bi_ortho/tc_hmat.irp.f
rename to plugins/local/slater_tc/tc_hmat.irp.f
diff --git a/plugins/local/tc_bi_ortho/NEED b/plugins/local/tc_bi_ortho/NEED
index 9a0c20ef..01841e02 100644
--- a/plugins/local/tc_bi_ortho/NEED
+++ b/plugins/local/tc_bi_ortho/NEED
@@ -1,6 +1,2 @@
-bi_ort_ints
-bi_ortho_mos
-tc_keywords
-non_hermit_dav
-dav_general_mat
 tc_scf
+slater_tc
diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg
index e0776136..f7bd06bd 100644
--- a/plugins/local/tc_keywords/EZFIO.cfg
+++ b/plugins/local/tc_keywords/EZFIO.cfg
@@ -285,3 +285,22 @@ type: logical
 doc: If |true|, you minimize the angle between the left and right vectors associated to degenerate orbitals
 interface: ezfio,provider,ocaml
 default: False
+
+[ao_to_mo_tc_n3]
+type: logical
+doc: If |true|, memory scale of TC ao -> mo: O(N3)
+interface: ezfio,provider,ocaml
+default: False
+
+[tc_save_mem_loops]
+type: logical
+doc: If |true|, use loops to save memory TC
+interface: ezfio,provider,ocaml
+default: False
+
+[tc_save_mem]
+type: logical
+doc: If |true|, more calc but less mem
+interface: ezfio,provider,ocaml
+default: False
+
diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f
index 0b883865..8fd5e5b6 100644
--- a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f
+++ b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f
@@ -9,7 +9,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
   double precision              :: loc_1, loc_2, loc_3
   double precision, allocatable :: Okappa(:), Jkappa(:,:)
   double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:)
-  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:)
   double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:)
 
   PROVIDE mo_l_coef mo_r_coef
@@ -63,17 +63,13 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
   allocate(tmp_1(n_points_final_grid,4))
 
   do ipoint = 1, n_points_final_grid
-
     loc_1 = 2.d0 * Okappa(ipoint) 
-
     tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1)
     tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2)
     tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3)
-
     tmp_1(ipoint,4) = Okappa(ipoint)
   enddo
 
-
   !$OMP PARALLEL                                                  &
   !$OMP DEFAULT (NONE)                                            &
   !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2)                 &
@@ -112,58 +108,81 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
 
   ! ---
 
-  allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num))
+  if(tc_save_mem) then
 
-  !$OMP PARALLEL                                                  &
-  !$OMP DEFAULT (NONE)                                            &
-  !$OMP PRIVATE (ipoint, a, b)                                    &
-  !$OMP SHARED (n_points_final_grid, mo_num,                      &
-  !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
-  !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector,  &
-  !$OMP         tmp_2)
-  !$OMP DO COLLAPSE(2)
-  do a = 1, mo_num
-    do b = 1, mo_num
-      do ipoint = 1, n_points_final_grid
-        tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a)
-        tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a)
-        tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a)
-      enddo
-    enddo
-  enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
-
-  !$OMP PARALLEL                                                 &
-  !$OMP DEFAULT (NONE)                                           &
-  !$OMP PRIVATE (ipoint, a, b, i)                                &
-  !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num,      &
-  !$OMP         final_weight_at_r_vector, int2_grad1_u12_bimo_t, &
-  !$OMP         tmp_2)
-  !$OMP DO COLLAPSE(2)
-  do a = 1, mo_num
-    do b = 1, mo_num
-      tmp_2(:,4,b,a) = 0.d0
-      do i = 1, elec_beta_num
+    allocate(tmp_22(n_points_final_grid,4,mo_num))
+    do a = 1, mo_num
+      !$OMP PARALLEL                                                  &
+      !$OMP DEFAULT (NONE)                                            &
+      !$OMP PRIVATE (ipoint, b, i)                                    &
+      !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a,    &
+      !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+      !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector,  &
+      !$OMP         tmp_22)
+      !$OMP DO
+      do b = 1, mo_num
         do ipoint = 1, n_points_final_grid
-          tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) &
-                                                                    + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) &
-                                                                    + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) )
+          tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a)
+          tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a)
+          tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a)
+        enddo
+        tmp_22(:,4,b) = 0.d0
+        do i = 1, elec_beta_num
+          do ipoint = 1, n_points_final_grid
+            tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) &
+                                                                     + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) &
+                                                                     + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) )
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+      call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0        &
+                , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) &
+                , tmp_1(1,1), 1                                    &
+                , 0.d0, fock_3e_uhf_mo_cs(1,a), 1)
+    enddo
+    deallocate(tmp_22)
+
+  else
+
+    allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num))
+    !$OMP PARALLEL                                                  &
+    !$OMP DEFAULT (NONE)                                            &
+    !$OMP PRIVATE (ipoint, a, b, i)                                 &
+    !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num,       &
+    !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+    !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector,  &
+    !$OMP         tmp_2)
+    !$OMP DO COLLAPSE(2)
+    do a = 1, mo_num
+      do b = 1, mo_num
+        do ipoint = 1, n_points_final_grid
+          tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a)
+          tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a)
+          tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a)
+        enddo
+        tmp_2(:,4,b,a) = 0.d0
+        do i = 1, elec_beta_num
+          do ipoint = 1, n_points_final_grid
+            tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) &
+                                                                      + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) &
+                                                                      + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) )
+          enddo
         enddo
       enddo
     enddo
-  enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
+    !$OMP END DO
+    !$OMP END PARALLEL
+    call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 &
+              , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2)  &
+              , tmp_1(1,1), 1                                    &
+              , 0.d0, fock_3e_uhf_mo_cs(1,1), 1)
+    deallocate(tmp_2)
 
-  ! ---
+  endif
 
-  call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 &
-            , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2)  &
-            , tmp_1(1,1), 1                                    &
-            , 0.d0, fock_3e_uhf_mo_cs(1,1), 1)
-
-  deallocate(tmp_1, tmp_2)
+  deallocate(tmp_1)
 
   ! ---
 
@@ -272,7 +291,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
   ! ---
 
   !call wall_time(tf)
-  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', (tf - ti) / 60.d0
 
 END_PROVIDER 
 
diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f
index 63a1e162..47ee5b48 100644
--- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f
+++ b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -32,7 +32,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
   endif
 
   !call wall_time(tf)
-  !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti
+  !print *, ' Wall time for fock_3e_uhf_mo_a (min) =', (tf - ti)/60.d0
 
 END_PROVIDER 
 
diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f
index 282f9873..d3ddb8ad 100644
--- a/plugins/local/tc_scf/fock_tc.irp.f
+++ b/plugins/local/tc_scf/fock_tc.irp.f
@@ -175,7 +175,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
+BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)]
 
   BEGIN_DOC
   ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
diff --git a/plugins/local/tc_scf/minimize_tc_angles.irp.f b/plugins/local/tc_scf/minimize_tc_angles.irp.f
index c7752930..e5f6cf87 100644
--- a/plugins/local/tc_scf/minimize_tc_angles.irp.f
+++ b/plugins/local/tc_scf/minimize_tc_angles.irp.f
@@ -20,7 +20,7 @@ program minimize_tc_angles
   ! TODO
   ! check if rotations of orbitals affect the TC energy
   ! and refuse the step
-  call minimize_tc_orb_angles
+  call minimize_tc_orb_angles()
 
 end
 
diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f
index fbfc9beb..ae396028 100644
--- a/plugins/local/tc_scf/routines_rotates.irp.f
+++ b/plugins/local/tc_scf/routines_rotates.irp.f
@@ -40,9 +40,6 @@ subroutine LTxSxR(n, m, L, S, R, C)
 
 end subroutine LTxR
 
-! ---
-
-
 ! ---
 
 subroutine minimize_tc_orb_angles()
@@ -103,7 +100,10 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
   double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:)
   double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:)
 
-  E_thr = 1d-04
+  PROVIDE TC_HF_energy
+  PROVIDE mo_r_coef mo_l_coef
+
+  E_thr = 1d-07
   E_old = TC_HF_energy
   allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num))
   mo_r_coef_old = mo_r_coef
@@ -111,7 +111,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
 
   good_angles = .False.
 
-  allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
+  allocate(mo_l_coef_good(ao_num,mo_num), mo_r_coef_good(ao_num,mo_num))
 
   print *, ' ***************************************'
   print *, ' ***************************************'
@@ -123,7 +123,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
   mo_r_coef_good = mo_r_coef
   mo_l_coef_good = mo_l_coef
 
-  allocate(mo_r_coef_new(ao_num, mo_num))
+  allocate(mo_r_coef_new(ao_num,mo_num))
   mo_r_coef_new = mo_r_coef
   do i = 1, mo_num
     norm = 1.d0/dsqrt(overlap_mo_r(i,i))
@@ -141,10 +141,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
   call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat)
 ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list)
   if(n_core_orb.ne.0)then
-   call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list)
+    call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list)
   else
-   call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list)
+    call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list)
   endif
+
   print *, ' fock_matrix_mo'
   do i = 1, mo_num
     print *, i, fock_diag(i), angle_left_right(i)
@@ -156,50 +157,52 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
 !  n_degen = ilast - ifirst +1
 
     n_degen = list_degen(i,0)
-    if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals
+    if(n_degen .ge. 1000) n_degen = 1 ! convention for core orbitals
      
     if(n_degen .eq. 1) cycle
+    print*, ' working on orbital', i
+    print*, ' multiplicity =', n_degen
 
     allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen))
     allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen))
     allocate(T(n_degen,n_degen), Snew(n_degen,n_degen))
 
     print*,'Right orbitals before'
-     do j = 1, n_degen
-       write(*,'(100(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j))
-     enddo
+    do j = 1, n_degen
+      write(*,'(1000(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j))
+    enddo
     print*,'Left orbitals before'
-     do j = 1, n_degen
-       write(*,'(100(F16.10,X))')mo_l_coef(1:ao_num,list_degen(i,j)) 
-     enddo
+    do j = 1, n_degen
+      write(*,'(1000(F16.10,X))') mo_l_coef(1:ao_num,list_degen(i,j)) 
+    enddo
     if(angle_left_right(list_degen(i,1)).gt.80.d0.and.n_degen==2)then
-       integer :: i_list, j_list
-       i_list = list_degen(i,1)
-       j_list = list_degen(i,2)
-       print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2))
-       print*,'i_list = ',i_list
-       print*,'i_list = ',j_list
-       print*,'Swapping left/right orbitals'
-       call print_strong_overlap(i_list, j_list)
-       mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list)
-       mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list)
-       mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list)
-       mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list)
+      integer :: i_list, j_list
+      i_list = list_degen(i,1)
+      j_list = list_degen(i,2)
+      print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2))
+      print*,'i_list = ',i_list
+      print*,'i_list = ',j_list
+      print*,'Swapping left/right orbitals'
+      call print_strong_overlap(i_list, j_list)
+      mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list)
+      mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list)
+      mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list)
+      mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list)
     else
-     do j = 1, n_degen
-       print*,'i_list = ',list_degen(i,j)
-       mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
-       mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
-     enddo
+      do j = 1, n_degen
+        print*,'i_list = ',list_degen(i,j)
+        mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
+        mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
+      enddo
     endif
     print*,'Right orbitals '
-     do j = 1, n_degen
-       write(*,'(100(F16.10,X))')mo_r_coef_tmp(1:ao_num,j) 
-     enddo
+    do j = 1, n_degen
+      write(*,'(1000(F16.10,X))') mo_r_coef_tmp(1:ao_num,j) 
+    enddo
     print*,'Left orbitals '
-     do j = 1, n_degen
-       write(*,'(100(F16.10,X))')mo_l_coef_tmp(1:ao_num,j) 
-     enddo
+    do j = 1, n_degen
+      write(*,'(100(F16.10,X))') mo_l_coef_tmp(1:ao_num,j) 
+    enddo
     ! Orthogonalization of right functions
     print *, ' Orthogonalization of RIGHT functions'
     print *, ' ------------------------------------'
diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f
new file mode 100644
index 00000000..7ce57578
--- /dev/null
+++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f
@@ -0,0 +1,58 @@
+! ---
+
+program write_ao_2e_tc_integ
+
+  implicit none
+
+  PROVIDE j1e_type
+  PROVIDE j2e_type
+
+  print *, ' j1e_type = ', j1e_type
+  print *, ' j2e_type = ', j2e_type
+
+  my_grid_becke  = .True.
+  PROVIDE tc_grid1_a tc_grid1_r
+  my_n_pt_r_grid = tc_grid1_r
+  my_n_pt_a_grid = tc_grid1_a
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call write_int(6, my_n_pt_r_grid, 'radial  external grid over')
+  call write_int(6, my_n_pt_a_grid, 'angular external grid over')
+
+  if(tc_integ_type .eq. "numeric") then
+    my_extra_grid_becke  = .True.
+    PROVIDE tc_grid2_a tc_grid2_r
+    my_n_pt_r_extra_grid = tc_grid2_r
+    my_n_pt_a_extra_grid = tc_grid2_a
+    touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+    call write_int(6, my_n_pt_r_extra_grid, 'radial  internal grid over')
+    call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over')
+  endif
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+
+  PROVIDE io_tc_integ
+
+  print*, 'io_tc_integ = ', io_tc_integ
+
+  if(io_tc_integ .ne. "Write") then
+    print*, 'io_tc_integ != Write'
+    print*, io_tc_integ
+    stop
+  endif
+
+  PROVIDE ao_two_e_tc_tot
+
+end
+
+! ---
+
diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f
index ae167282..16a52dc6 100644
--- a/src/becke_numerical_grid/extra_grid_vector.irp.f
+++ b/src/becke_numerical_grid/extra_grid_vector.irp.f
@@ -47,8 +47,12 @@ END_PROVIDER
   END_DOC
 
   implicit none
-  integer                        :: i,j,k,l,i_count
-  double precision               :: r(3)
+  integer          :: i, j, k, l, i_count
+  double precision :: r(3)
+  double precision :: wall0, wall1
+
+  call wall_time(wall0)
+  print *, ' Providing extra_final_grid_points ...'
 
   i_count = 0
   do j = 1, nucl_num
@@ -66,10 +70,25 @@ END_PROVIDER
         index_final_points_extra(2,i_count) = i
         index_final_points_extra(3,i_count) = j
         index_final_points_extra_reverse(k,i,j) = i_count
+
+        if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then
+          print *, ' !!! WARNING !!!'
+          print *, ' negative weight !!!!'
+          print *, i_count, final_weight_at_r_vector_extra(i_count)
+          if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then
+            final_weight_at_r_vector_extra(i_count) = 0.d0
+          else
+            stop
+          endif
+        endif
       enddo
     enddo
   enddo
 
+  call wall_time(wall1)
+  print *, ' wall time for extra_final_grid_points,', wall1 - wall0
+  call print_memory_usage()
+
 END_PROVIDER
 
 
diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f
index 473096d0..c35918c3 100644
--- a/src/becke_numerical_grid/grid_becke_vector.irp.f
+++ b/src/becke_numerical_grid/grid_becke_vector.irp.f
@@ -72,7 +72,11 @@ END_PROVIDER
           print *, ' !!! WARNING !!!'
           print *, ' negative weight !!!!'
           print *, i_count, final_weight_at_r_vector(i_count)
-          stop
+          if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then
+            final_weight_at_r_vector(i_count) = 0.d0
+          else
+            stop
+          endif
         endif 
       enddo
     enddo
diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f
index 913ae2f3..c7216a61 100644
--- a/src/mol_properties/multi_s_dipole_moment.irp.f
+++ b/src/mol_properties/multi_s_dipole_moment.irp.f
@@ -91,3 +91,42 @@ BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)]
   enddo
 
 END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenvec, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenvec, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenvec, (N_states, N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenval,           (N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenval,           (N_states)]
+&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval,           (N_states)]
+
+  implicit none
+  double precision, allocatable :: eigval(:), eigvec(:,:), A(:,:)
+
+  PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment
+
+  allocate(A(N_states,N_states), eigvec(N_states,N_states), eigval(N_states))
+
+  A = multi_s_x_dipole_moment
+  call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states)
+  multi_s_x_dipole_moment_eigenval = eigval
+  multi_s_x_dipole_moment_eigenvec = eigvec
+
+  A = multi_s_y_dipole_moment
+  call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states)
+  multi_s_y_dipole_moment_eigenval = eigval
+  multi_s_y_dipole_moment_eigenvec = eigvec
+
+  A = multi_s_z_dipole_moment
+  call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states)
+  multi_s_z_dipole_moment_eigenval = eigval
+  multi_s_z_dipole_moment_eigenvec = eigvec
+
+  deallocate(A, eigvec, eigval)
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tools/print_detweights.irp.f b/src/tools/print_detweights.irp.f
new file mode 100644
index 00000000..5e5f2bb1
--- /dev/null
+++ b/src/tools/print_detweights.irp.f
@@ -0,0 +1,97 @@
+program print_detweights
+
+  implicit none
+
+  read_wf = .True.
+  touch read_wf
+
+  call print_exc()
+  !call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer                       :: i
+  integer                       :: degree
+  integer                       :: ios
+  integer,          allocatable :: deg(:), ii(:), deg_sorted(:)
+  double precision, allocatable :: c(:)
+
+  PROVIDE N_int
+  PROVIDE N_det
+  PROVIDE psi_det
+  PROVIDe psi_coef
+
+  allocate(deg(N_det), ii(N_det), deg_sorted(N_det), c(N_det))
+
+  do i = 1, N_det
+
+    call debug_det(psi_det(1,1,i), N_int)
+    call get_excitation_degree(psi_det(1,1,i), psi_det(1,1,1), degree, N_int)
+
+    ii (i) = i
+    deg(i) = degree
+    c  (i) = dabs(psi_coef(i,1))
+  enddo
+
+  call dsort(c, ii, N_det)
+
+  do i = 1, N_det
+    deg_sorted(i) = deg(ii(i))
+    print *, deg_sorted(i), c(i)
+  enddo
+
+  print *, ' saving psi'
+
+  ! Writing output in binary format
+  open(unit=10, file="coef.bin", status="replace", action="write", iostat=ios, form="unformatted")
+
+    if(ios /= 0) then
+      print *, ' Error opening file!'
+      stop
+    endif
+
+    write(10) N_det
+    write(10) deg_sorted
+    write(10) c
+
+  close(10)
+
+  deallocate(deg, ii, deg_sorted, c)
+
+end
+
+! ---
+
+subroutine print_exc()
+
+  implicit none
+
+  integer              :: i
+  integer, allocatable :: deg(:)
+
+  PROVIDE N_int
+  PROVIDE N_det
+  PROVIDE psi_det
+
+  allocate(deg(N_det))
+
+  do i = 1, N_det
+    call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i), deg(i), N_int)
+  enddo
+
+  open(unit=10, file="exc.dat", action="write")
+    write(10,*) N_det
+    write(10,*) deg
+  close(10)
+
+  deallocate(deg)
+
+end
+
+
+
diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f
index 188bfa58..1a9ca8d6 100644
--- a/src/utils/block_diag_degen.irp.f
+++ b/src/utils/block_diag_degen.irp.f
@@ -191,7 +191,7 @@ subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list)
     list_degen(n_degen_list,1) = i
     icount = 1
     do j = i+1, n
-      if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then
+      if(dabs(A(i)-A(j)).lt.thr .and. is_ok(j)) then
         is_ok(j) = .False.
         icount += 1
         list_degen(n_degen_list,icount) = j
diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F
index 422eff95..7b01f888 100644
--- a/src/utils/constants.include.F
+++ b/src/utils/constants.include.F
@@ -18,3 +18,30 @@ double precision, parameter :: c_4_3 = 4.d0/3.d0
 double precision, parameter :: c_1_3 = 1.d0/3.d0
 double precision, parameter :: sq_op5 = dsqrt(0.5d0)
 double precision, parameter :: dlog_2pi =  dlog(2.d0*dacos(-1.d0))
+
+! physical constants and units conversion factors
+double precision, parameter :: k_boltzman_si    = 1.38066d-23       ! K k^-1 
+double precision, parameter :: k_boltzman_au    = 3.1667d-6         ! Hartree k^-1 
+double precision, parameter :: k_boltzman_m1_au = 315795.26d0       ! Hartree^-1 k 
+double precision, parameter :: bohr_radius_si   = 0.529177d-10      ! m
+double precision, parameter :: bohr_radius_cm   = 0.529177d-8       ! cm
+double precision, parameter :: bohr_radius_angs = 0.529177d0        ! Angstrom
+double precision, parameter :: electronmass_si  = 9.10953d-31       ! Kg
+double precision, parameter :: electronmass_uma = 5.4858d-4         ! uma
+double precision, parameter :: electronvolt_si  = 1.6021892d-19     ! J  
+double precision, parameter :: uma_si           = 1.66057d-27       ! Kg
+double precision, parameter :: debye_si         = 3.33564d-30       ! coulomb meter
+double precision, parameter :: debye_au         = 0.393427228d0     ! e * Bohr
+double precision, parameter :: angstrom_to_au   = 1.889727d0        ! au
+double precision, parameter :: au_to_ohmcmm1    = 46000.0d0         ! (ohm cm)^-1
+double precision, parameter :: au_to_kb         = 294210.0d0        ! kbar
+double precision, parameter :: au_to_eV         = 27.211652d0
+double precision, parameter :: uma_to_au        = 1822.89d0
+double precision, parameter :: au_to_terahertz  = 2.4189d-5
+double precision, parameter :: au_to_sec        = 2.4189d-17
+double precision, parameter :: au_to_fsec       = 2.4189d-2
+double precision, parameter :: Wcm2             = 3.5d16
+double precision, parameter :: amconv           = 1.66042d-24/9.1095d-28*0.5d0  !  mass conversion: a.m.u to a.u. (ry)
+double precision, parameter :: uakbar           = 147105.d0         !  pressure conversion from ry/(a.u)^3 to k
+
+