From 0c6d513bbf126e2ae92643a1e3cbfc94dbceab95 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 24 Jan 2025 19:07:16 +0100
Subject: [PATCH 01/59] Implemented single precision for cholesky mo

---
 src/ao_two_e_ints/cholesky.irp.f      |   3 +-
 src/fci/pt2.irp.f                     |   2 +-
 src/mo_two_e_ints/cholesky.irp.f      |  59 +++++++-
 src/mo_two_e_ints/map_integrals.irp.f | 188 +++++++++++++++++++-------
 src/mp2/mp2.irp.f                     |   2 +-
 5 files changed, 198 insertions(+), 56 deletions(-)

diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f
index 69b18900..fdc729d5 100644
--- a/src/ao_two_e_ints/cholesky.irp.f
+++ b/src/ao_two_e_ints/cholesky.irp.f
@@ -466,10 +466,11 @@ END_PROVIDER
      endif
 
 
+     ! Reverse order of Cholesky vectors to increase precision in dot products
      !$OMP PARALLEL DO PRIVATE(k,j)
      do k=1,rank
        do j=1,ao_num
-           cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k)
+           cholesky_ao(1:ao_num,j,rank-k+1) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1)
        enddo
      enddo
      !$OMP END PARALLEL DO
diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f
index 53bf1699..186f1ff6 100644
--- a/src/fci/pt2.irp.f
+++ b/src/fci/pt2.irp.f
@@ -15,11 +15,11 @@ program pt2
   ! sampling.
   !
   END_DOC
+  PROVIDE all_mo_integrals
   if (.not. is_zmq_slave) then
      read_wf = .True.
      threshold_generators = 1.d0
      SOFT_TOUCH read_wf threshold_generators
-     PROVIDE all_mo_integrals
      PROVIDE psi_energy
      call run
   else
diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f
index 1fed949d..835110de 100644
--- a/src/mo_two_e_ints/cholesky.irp.f
+++ b/src/mo_two_e_ints/cholesky.irp.f
@@ -7,7 +7,8 @@ BEGIN_PROVIDER [ logical, do_mo_cholesky ]
 ! do_mo_cholesky = .False.
 END_PROVIDER
 
-BEGIN_PROVIDER [ integer, cholesky_mo_num ]
+ BEGIN_PROVIDER [ integer, cholesky_mo_num ]
+&BEGIN_PROVIDER [ integer, cholesky_mo_num_split, (1:5)]
  implicit none
  BEGIN_DOC
  ! Number of Cholesky vectors in MO basis
@@ -21,6 +22,12 @@ BEGIN_PROVIDER [ integer, cholesky_mo_num ]
  else
    cholesky_mo_num = cholesky_ao_num
  endif
+ cholesky_mo_num_split(1) = 0
+ cholesky_mo_num_split(2) = cholesky_mo_num/4
+ cholesky_mo_num_split(3) = 2*cholesky_mo_num_split(2)
+ cholesky_mo_num_split(4) = 3*cholesky_mo_num_split(2)
+ cholesky_mo_num_split(5) = cholesky_mo_num
+ cholesky_mo_num_split += 1
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ]
@@ -49,7 +56,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num,
  BEGIN_DOC
  ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao:
  !
- ! -  cholesky_ao        is (ao_num^2 x cholesky_ao_num)
+ ! - cholesky_ao        is (ao_num^2 x cholesky_ao_num)
  !
  ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2)
  END_DOC
@@ -132,3 +139,51 @@ BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_
 
 END_PROVIDER
 
+
+
+
+BEGIN_PROVIDER [ real, cholesky_mo_sp, (mo_num, mo_num, cholesky_mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Cholesky vectors in MO basis in stored in single precision
+ END_DOC
+
+ integer :: k, i, j
+
+ call set_multiple_levels_omp(.False.)
+ !$OMP PARALLEL DO PRIVATE(k)
+ do k=1,cholesky_mo_num
+  do j=1,mo_num
+    do i=1,mo_num
+      cholesky_mo_sp(i,j,k) = cholesky_mo_transp_sp(k,i,j)
+    enddo
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ real, cholesky_mo_transp_sp, (cholesky_mo_num, mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Cholesky vectors in MO basis in s. Warning: it is transposed wrt cholesky_ao:
+ !
+ ! - cholesky_ao        is (ao_num^2 x cholesky_ao_num)
+ !
+ ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2)
+ END_DOC
+
+ integer :: i,j,k
+ !$OMP PARALLEL DO PRIVATE(k)
+ do k=1,cholesky_mo_num
+  do j=1,mo_num
+    do i=1,mo_num
+      cholesky_mo_transp_sp(k,i,j) = cholesky_mo_transp(k,i,j)
+    enddo
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 76f169b4..6040842e 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -9,6 +9,14 @@ BEGIN_PROVIDER [ logical, all_mo_integrals ]
   PROVIDE mo_two_e_integrals_in_map mo_integrals_cache mo_two_e_integrals_jj_exchange mo_two_e_integrals_jj_anti mo_two_e_integrals_jj big_array_exchange_integrals big_array_coulomb_integrals mo_one_e_integrals
 END_PROVIDER
 
+BEGIN_PROVIDER [ logical, mo_cholesky_double ]
+  implicit none
+  BEGIN_DOC
+! If true, use double precision to compute integrals from cholesky vectors
+  END_DOC
+  mo_cholesky_double = .True.
+END_PROVIDER
+
 
 !! MO Map
 !! ======
@@ -147,7 +155,7 @@ double precision function get_two_e_integral(i,j,k,l,map)
   type(map_type), intent(inout)  :: map
   real(integral_kind)            :: tmp
 
-  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky mo_cholesky_double cholesky_mo_transp_sp cholesky_mo_transp
 
   if (use_banned_excitation) then
     if (banned_excitation(i,k)) then
@@ -178,12 +186,19 @@ double precision function get_two_e_integral(i,j,k,l,map)
     if  (do_mo_cholesky) then
 
       double precision, external :: ddot
-      get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1)
-
-!      get_two_e_integral = 0.d0
-!      do kk=1,cholesky_mo_num
-!        get_two_e_integral = get_two_e_integral + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l)
-!      enddo
+      real, external :: sdot
+      integer :: isplit
+      if (mo_cholesky_double) then
+        get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1)
+      else
+        get_two_e_integral = 0.d0
+        do isplit=1,4
+          get_two_e_integral = get_two_e_integral + &
+                               sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1)
+        enddo
+      endif
 
     else
 
@@ -214,7 +229,8 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
   real(integral_kind)            :: tmp
   integer(key_kind)              :: i1, idx
   integer(key_kind)              :: p,q,r,s,i2
-  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache
+  real, allocatable :: out_val_sp(:)
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache cholesky_mo_transp cholesky_mo_transp_sp
 
   if (banned_excitation(j,l)) then
       out_val(1:sze) = 0.d0
@@ -225,6 +241,10 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
   ii = ior(ii, k-mo_integrals_cache_min)
   ii = ior(ii, j-mo_integrals_cache_min)
 
+  if (do_mo_cholesky.and. .not.mo_cholesky_double) then
+    allocate(out_val_sp(sze))
+  endif
+
   if (iand(ii, -mo_integrals_cache_size) == 0) then
     ! Some integrals are in the cache
 
@@ -232,11 +252,24 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
 
       if (do_mo_cholesky) then
 
-        !TODO: here
-        call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, &
-           cholesky_mo_transp(1,1,k), cholesky_mo_num, &
-           cholesky_mo_transp(1,j,l), 1, 0.d0, &
-           out_val, 1)
+        !TODO: bottleneck here
+        if (mo_cholesky_double) then
+          call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, &
+            cholesky_mo_transp(1,1,k), cholesky_mo_num, &
+            cholesky_mo_transp(1,j,l), 1, 0.d0, &
+            out_val, 1)
+        else
+          integer :: isplit
+          out_val = 0.d0
+          do isplit=1,4
+            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+              mo_integrals_cache_min-1, 1., &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+              out_val_sp, 1)
+            out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1)
+          enddo
+        endif
 
       else
 
@@ -270,11 +303,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
 
       if (do_mo_cholesky) then
 
-        !TODO: here
-        call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, &
-           cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, &
-           cholesky_mo_transp(1,j,l), 1, 0.d0, &
-           out_val(mo_integrals_cache_max+1), 1)
+        !TODO: bottleneck here
+        if (mo_cholesky_double) then
+          call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, &
+             cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, &
+             cholesky_mo_transp(1,j,l), 1, 0.d0, &
+             out_val(mo_integrals_cache_max+1), 1)
+        else
+          out_val = 0.d0
+          do isplit=1,4
+            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+              mo_num-mo_integrals_cache_max, 1., &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+              out_val_sp(mo_integrals_cache_max+1), 1)
+            out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze)
+          enddo
+        endif
 
       else
 
@@ -306,11 +351,23 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
 
     if (do_mo_cholesky) then
 
-      !TODO: here
-      call dgemv('T', cholesky_mo_num, mo_num, 1.d0, &
-           cholesky_mo_transp(1,1,k), cholesky_mo_num, &
-           cholesky_mo_transp(1,j,l), 1, 0.d0, &
-           out_val, 1)
+      !TODO: bottleneck here
+      if (mo_cholesky_double) then
+          call dgemv('T', cholesky_mo_num, sze, 1.d0, &
+               cholesky_mo_transp(1,1,k), cholesky_mo_num, &
+               cholesky_mo_transp(1,j,l), 1, 0.d0, &
+               out_val, 1)
+      else
+          out_val = 0.d0
+          do isplit=1,4
+            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+              sze, 1., &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+              out_val_sp, 1)
+            out_val(1:sze) += out_val_sp(1:sze)
+          enddo
+      endif
 
     else
 
@@ -513,7 +570,7 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
   type(map_type), intent(inout)  :: map
   integer                        :: i
   double precision, external     :: get_two_e_integral
-  PROVIDE mo_two_e_integrals_in_map
+  PROVIDE mo_two_e_integrals_in_map mo_cholesky_double cholesky_mo_transp cholesky_mo_transp_sp
 
   if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max<mo_num) ) then
 
@@ -523,40 +580,69 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
            (l>=mo_integrals_cache_min).and.(l<=mo_integrals_cache_max) ) then
 
         double precision, external :: ddot
+        real, external :: sdot
         integer :: kk
 
-        do i=1,mo_integrals_cache_min-1
-          out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
-                                             cholesky_mo_transp(1,i,l), 1)
-!          out_val(i) = 0.d0
-!          do kk=1,cholesky_mo_num
-!            out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l)
-!          enddo
-        enddo
+        if (mo_cholesky_double) then
 
-        do i=mo_integrals_cache_min,mo_integrals_cache_max
-          out_val(i) = get_two_e_integral_cache(i,i,k,l)
-        enddo
+          do i=1,mo_integrals_cache_min-1
+            out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
+                                               cholesky_mo_transp(1,i,l), 1)
+          enddo
 
-        do i=mo_integrals_cache_max, sze
-          out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
-                                             cholesky_mo_transp(1,i,l), 1)
-!          out_val(i) = 0.d0
-!          do kk=1,cholesky_mo_num
-!            out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l)
-!          enddo
-        enddo
+          do i=mo_integrals_cache_min,mo_integrals_cache_max
+            out_val(i) = get_two_e_integral_cache(i,i,k,l)
+          enddo
+
+          do i=mo_integrals_cache_max, sze
+            out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
+                                               cholesky_mo_transp(1,i,l), 1)
+          enddo
+
+        else
+
+          integer :: isplit
+          do i=1,mo_integrals_cache_min-1
+             out_val(i) = 0.d0
+             do isplit=1,4
+               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+             enddo
+          enddo
+
+          do i=mo_integrals_cache_min,mo_integrals_cache_max
+            out_val(i) = get_two_e_integral_cache(i,i,k,l)
+          enddo
+
+          do i=mo_integrals_cache_max, sze
+             out_val(i) = 0.d0
+             do isplit=1,4
+               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+             enddo
+          enddo
+
+        endif
 
       else
 
-        do i=1,sze
-          out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
-                                             cholesky_mo_transp(1,i,l), 1)
-!          out_val(i) = 0.d0
-!          do kk=1,cholesky_mo_num
-!            out_val(i) = out_val(i) + cholesky_mo_transp(kk,i,k)*cholesky_mo_transp(kk,i,l)
-!          enddo
-        enddo
+        if (mo_cholesky_double) then
+          do i=1,sze
+            out_val(i) = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, &
+                                               cholesky_mo_transp(1,i,l), 1)
+          enddo
+        else
+          do i=1,sze
+             out_val(i) = 0.d0
+             do isplit=1,4
+               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+             enddo
+          enddo
+        endif
 
       endif
 
diff --git a/src/mp2/mp2.irp.f b/src/mp2/mp2.irp.f
index b8e0cc4a..ecf2da1b 100644
--- a/src/mp2/mp2.irp.f
+++ b/src/mp2/mp2.irp.f
@@ -7,7 +7,7 @@ subroutine run
   double precision, allocatable  :: pt2(:), norm_pert(:)
   double precision               :: H_pert_diag, E_old
   integer                        :: N_st, iter
-  PROVIDE Fock_matrix_diag_mo H_apply_buffer_allocated
+  PROVIDE all_mo_integrals  Fock_matrix_diag_mo H_apply_buffer_allocated
   N_st = N_states
   allocate (pt2(N_st), norm_pert(N_st))
   E_old = HF_energy

From 11b65259969aa612464e2b24fbd7b3b21f980627 Mon Sep 17 00:00:00 2001
From: Yann Damour <77277447+Ydrnan@users.noreply.github.com>
Date: Fri, 24 Jan 2025 19:08:38 +0100
Subject: [PATCH 02/59] Fix deallocation pt2_serialized

---
 src/cipsi_utils/run_pt2_slave.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cipsi_utils/run_pt2_slave.irp.f b/src/cipsi_utils/run_pt2_slave.irp.f
index cb1dd1f5..90c0e086 100644
--- a/src/cipsi_utils/run_pt2_slave.irp.f
+++ b/src/cipsi_utils/run_pt2_slave.irp.f
@@ -350,7 +350,6 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task
   enddo
 
   rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
-  deallocate(pt2_serialized)
   if (rc == -1) then
     print *,  irp_here, ': error sending result'
     stop 3
@@ -358,6 +357,7 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task
   else if(rc /= size(pt2_serialized)*8) then
     stop 'push'
   endif
+  deallocate(pt2_serialized)
 
 
   rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE)

From 5a591f52405d5dd32f4597ee9cfd5bed8477fa5b Mon Sep 17 00:00:00 2001
From: Yann Damour <77277447+Ydrnan@users.noreply.github.com>
Date: Fri, 24 Jan 2025 20:13:32 +0100
Subject: [PATCH 03/59] Fix segfault in scf

---
 src/scf_utils/roothaan_hall_scf.irp.f | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 274f08d6..9e2ca4bc 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -228,9 +228,10 @@ END_DOC
   do while (i<mo_num)
     j=i+1
     m=1
-    do while ( (j<=mo_num).and.(fock_matrix_diag_mo(j)-fock_matrix_diag_mo(i) < 1.d-5) )
+    do while ( (fock_matrix_diag_mo(j)-fock_matrix_diag_mo(i) < 1.d-5) )
       j += 1
       m += 1
+      if (j > mo_num) exit
     enddo
     if (m>1) then
       call dgemm('N','T',ao_num,ao_num,m,1.d0,mo_coef(1,i),size(mo_coef,1),mo_coef(1,i),size(mo_coef,1),0.d0,S,size(S,1))

From d102c0f54f222b4ccbcc242951686f42da9e42c2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 27 Jan 2025 13:29:42 +0100
Subject: [PATCH 04/59] Fix Cholesky H2O 6-31G

---
 src/ao_two_e_ints/cholesky.irp.f | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f
index 69b18900..3178a5f1 100644
--- a/src/ao_two_e_ints/cholesky.irp.f
+++ b/src/ao_two_e_ints/cholesky.irp.f
@@ -194,14 +194,11 @@ END_PROVIDER
          + (np+1)*memory_of_double(block_size)   ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size)
 
 !     call check_mem(mem)
-
      ! 5.
      do while ( (Dmax > tau).and.(np > 0) )
        ! a.
        i = i+1
 
-
-
        block_size = max(N,24)
 
        ! Determine nq so that Delta fits in memory
@@ -308,6 +305,8 @@ END_PROVIDER
          Qmax = max(Qmax, D(Dset(q)))
        enddo
 
+       if (Qmax <= Dmin) exit
+
        ! g.
 
        iblock = 0

From d01e24c30287f71886da6f8b04370fe7075fbfce Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 27 Jan 2025 16:22:31 +0100
Subject: [PATCH 05/59] Fixed Pointer argument 'map' is not associated

---
 src/utils/map_module.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90
index ceaec874..9ebcbc6d 100644
--- a/src/utils/map_module.f90
+++ b/src/utils/map_module.f90
@@ -157,7 +157,7 @@ subroutine cache_map_reallocate(map,sze)
     stop 2
   endif
   if (associated(map%value)) then
-    do i=1_8,min(size(map%key),map%n_elements)
+    do i=1_8,min(size(map%value),map%n_elements)
       value_new(i) = map%value(i)
     enddo
     deallocate(map%value)

From 7d46b5ce3451d33d65ca457963dd3d436ca9668d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 27 Jan 2025 16:36:35 +0100
Subject: [PATCH 06/59] Removed ZeroMQ from AO integrals calculation

---
 src/ao_two_e_ints/NEED                        |   1 -
 .../integrals_erf_in_map_slave.irp.f          | 194 --------------
 .../integrals_in_map_slave.irp.f              | 244 ------------------
 src/ao_two_e_ints/providers_ao_erf.irp.f      |  53 ++--
 src/ao_two_e_ints/two_e_integrals.irp.f       | 104 +++-----
 5 files changed, 63 insertions(+), 533 deletions(-)
 delete mode 100644 src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f
 delete mode 100644 src/ao_two_e_ints/integrals_in_map_slave.irp.f

diff --git a/src/ao_two_e_ints/NEED b/src/ao_two_e_ints/NEED
index 542962ec..34b4a641 100644
--- a/src/ao_two_e_ints/NEED
+++ b/src/ao_two_e_ints/NEED
@@ -2,5 +2,4 @@ hamiltonian
 ao_one_e_ints
 pseudo
 bitmask
-zmq
 ao_basis
diff --git a/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f
deleted file mode 100644
index ded49233..00000000
--- a/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f
+++ /dev/null
@@ -1,194 +0,0 @@
-subroutine ao_two_e_integrals_erf_in_map_slave_tcp(i)
-  implicit none
-  integer, intent(in)            :: i
-  BEGIN_DOC
-! Computes a buffer of integrals. i is the ID of the current thread.
-  END_DOC
-  call ao_two_e_integrals_erf_in_map_slave(0,i)
-end
-
-
-subroutine ao_two_e_integrals_erf_in_map_slave_inproc(i)
-  implicit none
-  integer, intent(in)            :: i
-  BEGIN_DOC
-! Computes a buffer of integrals. i is the ID of the current thread.
-  END_DOC
-  call ao_two_e_integrals_erf_in_map_slave(1,i)
-end
-
-
-
-
-
-subroutine ao_two_e_integrals_erf_in_map_slave(thread,iproc)
-  use map_module
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Computes a buffer of integrals
-  END_DOC
-
-  integer, intent(in)            :: thread, iproc
-
-  integer                        :: j,l,n_integrals
-  integer                        :: rc
-  real(integral_kind), allocatable :: buffer_value(:)
-  integer(key_kind), allocatable :: buffer_i(:)
-
-  integer                        :: worker_id, task_id
-  character*(512)                :: task
-
-  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
-  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
-
-  integer(ZMQ_PTR), external     :: new_zmq_push_socket
-  integer(ZMQ_PTR)               :: zmq_socket_push
-
-  character*(64)                 :: state
-
-  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
-
-  integer, external :: connect_to_taskserver
-  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
-    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-    return
-  endif
-
-  zmq_socket_push      = new_zmq_push_socket(thread)
-
-  allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) )
-
-
-  do
-    integer, external :: get_task_from_taskserver
-    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then
-      exit
-    endif
-    if (task_id == 0) exit
-    read(task,*) j, l
-    integer, external :: task_done_to_taskserver
-    call compute_ao_integrals_erf_jl(j,l,n_integrals,buffer_i,buffer_value)
-    if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
-        stop 'Unable to send task_done'
-    endif
-    call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
-  enddo
-
-  integer, external :: disconnect_from_taskserver
-  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
-    continue
-  endif
-  deallocate( buffer_i, buffer_value )
-  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-  call end_zmq_push_socket(zmq_socket_push,thread)
-
-end
-
-
-subroutine ao_two_e_integrals_erf_in_map_collector(zmq_socket_pull)
-  use map_module
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Collects results from the AO integral calculation
-  END_DOC
-
-  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
-  integer                        :: j,l,n_integrals
-  integer                        :: rc
-
-  real(integral_kind), allocatable :: buffer_value(:)
-  integer(key_kind), allocatable :: buffer_i(:)
-
-  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
-  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
-
-  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
-
-  integer*8                      :: control, accu, sze
-  integer                        :: task_id, more
-
-  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
-
-  sze = ao_num*ao_num
-  allocate ( buffer_i(sze), buffer_value(sze) )
-
-  accu = 0_8
-  more = 1
-  do while (more == 1)
-
-    rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)
-    if (rc == -1) then
-      n_integrals = 0
-      return
-    endif
-    if (rc /= 4) then
-      print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)'
-      stop 'error'
-    endif
-
-    if (n_integrals >= 0) then
-
-      if (n_integrals > sze) then
-        deallocate (buffer_value, buffer_i)
-        sze = n_integrals
-        allocate (buffer_value(sze), buffer_i(sze))
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)
-      if (rc /= key_kind*n_integrals) then
-        print *,  rc, key_kind, n_integrals
-        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)'
-        stop 'error'
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)
-      if (rc /= integral_kind*n_integrals) then
-        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)'
-        stop 'error'
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
-
-IRP_IF ZMQ_PUSH
-IRP_ELSE
-      rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
-      if (rc /= 4) then
-        print *,  irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
-        stop 'error'
-      endif
-IRP_ENDIF
-
-
-      call insert_into_ao_integrals_erf_map(n_integrals,buffer_i,buffer_value)
-      accu += n_integrals
-      if (task_id /= 0) then
-        integer, external :: zmq_delete_task
-        if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then
-          stop 'Unable to delete task'
-        endif
-      endif
-    endif
-
-  enddo
-
-  deallocate( buffer_i, buffer_value )
-
-  integer (map_size_kind) :: get_ao_erf_map_size
-  control = get_ao_erf_map_size(ao_integrals_erf_map)
-
-  if (control /= accu) then
-      print *, ''
-      print *, irp_here
-      print *, 'Control : ', control
-      print *, 'Accu    : ', accu
-      print *, 'Some integrals were lost during the parallel computation.'
-      print *, 'Try to reduce the number of threads.'
-      stop
-  endif
-
-  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-
-end
-
diff --git a/src/ao_two_e_ints/integrals_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_in_map_slave.irp.f
deleted file mode 100644
index 122fa2ac..00000000
--- a/src/ao_two_e_ints/integrals_in_map_slave.irp.f
+++ /dev/null
@@ -1,244 +0,0 @@
-subroutine ao_two_e_integrals_in_map_slave_tcp(i)
-  implicit none
-  integer, intent(in)            :: i
-  BEGIN_DOC
-! Computes a buffer of integrals. i is the ID of the current thread.
-  END_DOC
-  call ao_two_e_integrals_in_map_slave(0,i)
-end
-
-
-subroutine ao_two_e_integrals_in_map_slave_inproc(i)
-  implicit none
-  integer, intent(in)            :: i
-  BEGIN_DOC
-! Computes a buffer of integrals. i is the ID of the current thread.
-  END_DOC
-  call ao_two_e_integrals_in_map_slave(1,i)
-end
-
-
-subroutine push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
-  use f77_zmq
-  use map_module
-  implicit none
-  BEGIN_DOC
-! Push integrals in the push socket
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
-  integer, intent(in)            :: n_integrals
-  integer(key_kind), intent(in)  :: buffer_i(*)
-  real(integral_kind), intent(in) :: buffer_value(*)
-  integer, intent(in)            :: task_id
-  integer                        :: rc
-
-  rc = f77_zmq_send( zmq_socket_push, n_integrals, 4, ZMQ_SNDMORE)
-  if (rc /= 4) then
-    print *, irp_here,  ': f77_zmq_send( zmq_socket_push, n_integrals, 4, ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send( zmq_socket_push, buffer_i, key_kind*n_integrals, ZMQ_SNDMORE)
-  if (rc /= key_kind*n_integrals) then
-    print *, irp_here,  ': f77_zmq_send( zmq_socket_push, buffer_i, key_kind*n_integrals, ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send( zmq_socket_push, buffer_value, integral_kind*n_integrals, ZMQ_SNDMORE)
-  if (rc /= integral_kind*n_integrals) then
-    print *, irp_here,  ': f77_zmq_send( zmq_socket_push, buffer_value, integral_kind*n_integrals, 0)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
-  if (rc /= 4) then
-    print *, irp_here,  ': f77_zmq_send( zmq_socket_push, task_id, 4, 0)'
-    stop 'error'
-  endif
-
-IRP_IF ZMQ_PUSH
-IRP_ELSE
-  integer :: idummy
-  rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
-  if (rc /= 4) then
-    print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
-    stop 'error'
-  endif
-IRP_ENDIF
-end
-
-
-
-
-
-subroutine ao_two_e_integrals_in_map_slave(thread,iproc)
-  use map_module
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Computes a buffer of integrals
-  END_DOC
-
-  integer, intent(in)            :: thread, iproc
-
-  integer                        :: j,l,n_integrals
-  integer                        :: rc
-  real(integral_kind), allocatable :: buffer_value(:)
-  integer(key_kind), allocatable :: buffer_i(:)
-
-  integer                        :: worker_id, task_id
-  character*(512)                :: task
-
-  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
-  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
-
-  integer(ZMQ_PTR), external     :: new_zmq_push_socket
-  integer(ZMQ_PTR)               :: zmq_socket_push
-
-  character*(64)                 :: state
-
-  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
-
-  integer, external :: connect_to_taskserver
-  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
-    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-    return
-  endif
-
-  zmq_socket_push      = new_zmq_push_socket(thread)
-
-  allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) )
-
-
-  do
-    integer, external :: get_task_from_taskserver
-    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then
-      exit
-    endif
-    if (task_id == 0) exit
-    call sscanf_dd(task, j, l)
-    integer, external :: task_done_to_taskserver
-    call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
-    if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
-        stop 'Unable to send task_done'
-    endif
-    call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
-  enddo
-
-  integer, external :: disconnect_from_taskserver
-  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
-    continue
-  endif
-  deallocate( buffer_i, buffer_value )
-  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-  call end_zmq_push_socket(zmq_socket_push,thread)
-
-end
-
-
-subroutine ao_two_e_integrals_in_map_collector(zmq_socket_pull)
-  use map_module
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Collects results from the AO integral calculation
-  END_DOC
-
-  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
-  integer                        :: j,l,n_integrals
-  integer                        :: rc
-
-  real(integral_kind), allocatable :: buffer_value(:)
-  integer(key_kind), allocatable :: buffer_i(:)
-
-  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
-  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
-
-  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
-
-  integer*8                      :: control, accu, sze
-  integer                        :: task_id, more
-
-  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
-
-  sze = ao_num*ao_num
-  allocate ( buffer_i(sze), buffer_value(sze) )
-
-  accu = 0_8
-  more = 1
-  do while (more == 1)
-
-    rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)
-    if (rc == -1) then
-      n_integrals = 0
-      return
-    endif
-    if (rc /= 4) then
-      print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)'
-      stop 'error'
-    endif
-
-    if (n_integrals >= 0) then
-
-      if (n_integrals > sze) then
-        deallocate (buffer_value, buffer_i)
-        sze = n_integrals
-        allocate (buffer_value(sze), buffer_i(sze))
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)
-      if (rc /= key_kind*n_integrals) then
-        print *,  rc, key_kind, n_integrals
-        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)'
-        stop 'error'
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)
-      if (rc /= integral_kind*n_integrals) then
-        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)'
-        stop 'error'
-      endif
-
-      rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
-
-IRP_IF ZMQ_PUSH
-IRP_ELSE
-      rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
-      if (rc /= 4) then
-        print *,  irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
-        stop 'error'
-      endif
-IRP_ENDIF
-
-
-      call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value)
-      accu += n_integrals
-      if (task_id /= 0) then
-        integer, external :: zmq_delete_task
-        if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then
-          stop 'Unable to delete task'
-        endif
-      endif
-    endif
-
-  enddo
-
-  deallocate( buffer_i, buffer_value )
-
-  integer (map_size_kind) :: get_ao_map_size
-  control = get_ao_map_size(ao_integrals_map)
-
-  if (control /= accu) then
-      print *, ''
-      print *, irp_here
-      print *, 'Control : ', control
-      print *, 'Accu    : ', accu
-      print *, 'Some integrals were lost during the parallel computation.'
-      print *, 'Try to reduce the number of threads.'
-      stop
-  endif
-
-  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
-
-end
-
diff --git a/src/ao_two_e_ints/providers_ao_erf.irp.f b/src/ao_two_e_ints/providers_ao_erf.irp.f
index ff8c31a2..8468fb0a 100644
--- a/src/ao_two_e_ints/providers_ao_erf.irp.f
+++ b/src/ao_two_e_ints/providers_ao_erf.irp.f
@@ -1,7 +1,5 @@
-
 BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ]
   implicit none
-  use f77_zmq
   use map_module
   BEGIN_DOC
   !  Map of Atomic integrals
@@ -15,17 +13,16 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ]
 
   ! For integrals file
   integer(key_kind),allocatable  :: buffer_i(:)
-  integer,parameter              :: size_buffer = 1024*64
+  integer                        :: size_buffer 
   real(integral_kind),allocatable :: buffer_value(:)
 
   integer                        :: n_integrals, rc
   integer                        :: kk, m, j1, i1, lmax
   character*(64)                 :: fmt
 
-  integral = ao_two_e_integral_erf(1,1,1,1)
-
   double precision               :: map_mb
-  PROVIDE read_ao_two_e_integrals_erf io_ao_two_e_integrals_erf
+  PROVIDE read_ao_two_e_integrals_erf io_ao_two_e_integrals_erf ao_integrals_erf_map
+
   if (read_ao_two_e_integrals_erf) then
     print*,'Reading the AO ERF integrals'
       call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map)
@@ -39,37 +36,27 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ]
   call wall_time(wall_1)
   call cpu_time(cpu_1)
 
-  integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
-  call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_integrals_erf')
-
-  character(len=:), allocatable :: task
-  allocate(character(len=ao_num*12) :: task)
-  write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))'
-  do l=1,ao_num
-    write(task,fmt) (i,l, i=1,l)
-    integer, external :: add_task_to_taskserver
-    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then
-      stop 'Unable to add task to server'
-    endif
-  enddo
-  deallocate(task)
-
-  integer, external :: zmq_set_running
-  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
-    print *,  irp_here, ': Failed in zmq_set_running'
+  if (.True.) then
+    ! Avoid openMP
+    integral = ao_two_e_integral_erf(1,1,1,1)
   endif
 
-  PROVIDE nproc
-  !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1)
-      i = omp_get_thread_num()
-      if (i==0) then
-        call ao_two_e_integrals_erf_in_map_collector(zmq_socket_pull)
-      else
-        call ao_two_e_integrals_erf_in_map_slave_inproc(i)
-      endif
+  size_buffer = ao_num*ao_num
+  !$OMP PARALLEL DEFAULT(shared) private(j,l) &
+  !$OMP PRIVATE(buffer_i, buffer_value, n_integrals)
+  allocate(buffer_i(size_buffer), buffer_value(size_buffer))
+  n_integrals = 0
+  !$OMP DO COLLAPSE(1) SCHEDULE(dynamic)
+  do l=1,ao_num
+    do j=1,l
+      call compute_ao_integrals_erf_jl(j,l,n_integrals,buffer_i,buffer_value)
+      call insert_into_ao_integrals_erf_map(n_integrals,buffer_i,buffer_value)
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(buffer_i, buffer_value)
   !$OMP END PARALLEL
 
-  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_integrals_erf')
 
 
   print*, 'Sorting the map'
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index e4bd9d1d..fb376ce1 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -397,7 +397,6 @@ end
 
 BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
   implicit none
-  use f77_zmq
   use map_module
   BEGIN_DOC
   !  Map of Atomic integrals
@@ -411,7 +410,7 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
 
   ! For integrals file
   integer(key_kind),allocatable  :: buffer_i(:)
-  integer,parameter              :: size_buffer = 1024*64
+  integer                        :: size_buffer 
   real(integral_kind),allocatable :: buffer_value(:)
 
   integer                        :: n_integrals, rc
@@ -419,78 +418,61 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
   character*(64)                 :: fmt
 
   double precision               :: map_mb
-  PROVIDE read_ao_two_e_integrals io_ao_two_e_integrals
+  PROVIDE read_ao_two_e_integrals io_ao_two_e_integrals ao_integrals_map
+
   if (read_ao_two_e_integrals) then
     print*,'Reading the AO integrals'
     call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
     print*, 'AO integrals provided'
     ao_two_e_integrals_in_map = .True.
-  else
+    return
+  endif
 
-    print*, 'Providing the AO integrals'
-    call wall_time(wall_0)
-    call wall_time(wall_1)
-    call cpu_time(cpu_1)
+  print*, 'Providing the AO integrals'
+  call wall_time(wall_0)
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
 
-    if (.True.) then
-      ! Avoid openMP
-      integral = ao_two_e_integral(1,1,1,1)
-    endif
+  if (.True.) then
+    ! Avoid openMP
+    integral = ao_two_e_integral(1,1,1,1)
+  endif
 
-    integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
-    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_integrals')
-
-    character(len=:), allocatable :: task
-    allocate(character(len=ao_num*12) :: task)
-    write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))'
-    do l=1,ao_num
-      write(task,fmt) (i,l, i=1,l)
-      integer, external :: add_task_to_taskserver
-      if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then
-        stop 'Unable to add task to server'
-      endif
+  size_buffer = ao_num*ao_num
+  !$OMP PARALLEL DEFAULT(shared) private(j,l) &
+  !$OMP PRIVATE(buffer_i, buffer_value, n_integrals)
+  allocate(buffer_i(size_buffer), buffer_value(size_buffer))
+  n_integrals = 0
+  !$OMP DO COLLAPSE(1) SCHEDULE(dynamic)
+  do l=1,ao_num
+    do j=1,l
+      call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
+      call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value)
     enddo
-    deallocate(task)
+  enddo
+  !$OMP END DO
+  deallocate(buffer_i, buffer_value)
+  !$OMP END PARALLEL
 
-    integer, external :: zmq_set_running
-    if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
-      print *,  irp_here, ': Failed in zmq_set_running'
-    endif
+  print*, 'Sorting the map'
+  call map_sort(ao_integrals_map)
+  call cpu_time(cpu_2)
+  call wall_time(wall_2)
+  integer(map_size_kind)         :: get_ao_map_size, ao_map_size
+  ao_map_size = get_ao_map_size()
 
-    PROVIDE nproc
-    !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1)
-        i = omp_get_thread_num()
-        if (i==0) then
-          call ao_two_e_integrals_in_map_collector(zmq_socket_pull)
-        else
-          call ao_two_e_integrals_in_map_slave_inproc(i)
-        endif
-    !$OMP END PARALLEL
+  print*, 'AO integrals provided:'
+  print*, ' Size of AO map :         ', map_mb(ao_integrals_map) ,'MB'
+  print*, ' Number of AO integrals :', ao_map_size
+  print*, ' cpu  time :',cpu_2 - cpu_1, 's'
+  print*, ' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
 
-    call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_integrals')
-
-
-    print*, 'Sorting the map'
-    call map_sort(ao_integrals_map)
-    call cpu_time(cpu_2)
-    call wall_time(wall_2)
-    integer(map_size_kind)         :: get_ao_map_size, ao_map_size
-    ao_map_size = get_ao_map_size()
-
-    print*, 'AO integrals provided:'
-    print*, ' Size of AO map :         ', map_mb(ao_integrals_map) ,'MB'
-    print*, ' Number of AO integrals :', ao_map_size
-    print*, ' cpu  time :',cpu_2 - cpu_1, 's'
-    print*, ' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
-
-    ao_two_e_integrals_in_map = .True.
-
-    if (write_ao_two_e_integrals.and.mpi_master) then
-      call ezfio_set_work_empty(.False.)
-      call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
-      call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
-    endif
+  ao_two_e_integrals_in_map = .True.
 
+  if (write_ao_two_e_integrals.and.mpi_master) then
+    call ezfio_set_work_empty(.False.)
+    call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
+    call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
   endif
 
 END_PROVIDER

From 3478e741a4c332b7373ad00aa3c443d9f4dbd2d2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 30 Jan 2025 13:13:34 +0100
Subject: [PATCH 07/59] Updated EZFIO

---
 external/ezfio | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091

From e1523c492941cd8a9d290743956b6b2a20f589bf Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 31 Jan 2025 11:40:49 +0100
Subject: [PATCH 08/59] Removed irrelevent dependency on cholesky

---
 src/mo_two_e_ints/map_integrals.irp.f | 4 ++--
 src/tools/print_energy.irp.f          | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 6040842e..5b233899 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -155,7 +155,7 @@ double precision function get_two_e_integral(i,j,k,l,map)
   type(map_type), intent(inout)  :: map
   real(integral_kind)            :: tmp
 
-  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky mo_cholesky_double cholesky_mo_transp_sp cholesky_mo_transp
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky
 
   if (use_banned_excitation) then
     if (banned_excitation(i,k)) then
@@ -570,7 +570,7 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
   type(map_type), intent(inout)  :: map
   integer                        :: i
   double precision, external     :: get_two_e_integral
-  PROVIDE mo_two_e_integrals_in_map mo_cholesky_double cholesky_mo_transp cholesky_mo_transp_sp
+  PROVIDE mo_two_e_integrals_in_map mo_cholesky_double
 
   if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max<mo_num) ) then
 
diff --git a/src/tools/print_energy.irp.f b/src/tools/print_energy.irp.f
index d94a7156..d5e5f31f 100644
--- a/src/tools/print_energy.irp.f
+++ b/src/tools/print_energy.irp.f
@@ -14,9 +14,8 @@ end
 
 subroutine run
  implicit none
- call print_mol_properties
- print *, psi_energy + nuclear_repulsion
- call print_energy_components
+  call print_mol_properties
+  call print_energy_components
 ! print *, 'E(HF) = ', HF_energy
  print *, 'E(CI) = ', psi_energy + nuclear_repulsion
 ! print *, ''

From c285aaca49d26ed4f3019aa2b9adc900dbc9e443 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 31 Jan 2025 17:14:49 +0100
Subject: [PATCH 09/59] Stabilize PT2 when expected accuracy is low

---
 external/ezfio                           | 2 +-
 src/cipsi_utils/pt2_stoch_routines.irp.f | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index d02132ea..dba01c4f 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
+Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f
index 144d052d..162ab02c 100644
--- a/src/cipsi_utils/pt2_stoch_routines.irp.f
+++ b/src/cipsi_utils/pt2_stoch_routines.irp.f
@@ -530,7 +530,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
         avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
         avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
         avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
-        if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
+        if (((c>=10).and.(avg /= 0.d0)) .or. (n == N_det_generators) ) then
           do_exit = .true.
         endif
         if (qp_stop()) then

From 4b9939e738fbecf831847c9047a8bd11b99abd9b Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 3 Feb 2025 13:41:46 +0100
Subject: [PATCH 10/59] Fix qp_exc_energy.py:195: DeprecationWarning:
 Conversion of an array with ndim > 0 to a scalar is deprecated

---
 scripts/qp_exc_energy.py     | 12 ++++++++++--
 src/davidson/u0_hs2_u0.irp.f |  4 ++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py
index 44136311..e08866e3 100755
--- a/scripts/qp_exc_energy.py
+++ b/scripts/qp_exc_energy.py
@@ -157,11 +157,15 @@ A = np.array( [ [ data[-1][1], 1. ],
 B = np.array( [ [ data[-1][0] ],
                 [ data[-2][0] ] ] )
 E0 = np.linalg.solve(A,B)[1]
+E0 = E0[0]
+
 A = np.array( [ [ data[-1][4], 1. ],
                 [ data[-2][4], 1. ] ] )
 B = np.array( [ [ data[-1][3] ],
                 [ data[-2][3] ] ] )
 E1 = np.linalg.solve(A,B)[1]
+E1 = E1[0]
+
 average_2 = (E1-E0)*to_eV
 
 A = np.array( [ [ data[-1][1], 1. ],
@@ -170,14 +174,18 @@ A = np.array( [ [ data[-1][1], 1. ],
 B = np.array( [ [ data[-1][0] ],
                 [ data[-2][0] ],
                 [ data[-3][0] ] ] )
-E0 = np.linalg.lstsq(A,B,rcond=None)[0][1]
+E0 = np.linalg.lstsq(A,B,rcond=None)[0]
+E0 = E0[0][0]
+
 A = np.array( [ [ data[-1][4], 1. ],
                 [ data[-2][4], 1. ],
                 [ data[-3][4], 1. ] ] )
 B = np.array( [ [ data[-1][3] ],
                 [ data[-2][3] ],
                 [ data[-3][3] ] ] )
-E1 = np.linalg.lstsq(A,B,rcond=None)[0][1]
+E1 = np.linalg.lstsq(A,B,rcond=None)[0]
+E1 = E1[0][0]
+
 average_3 = (E1-E0)*to_eV
 
 exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV
diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f
index 3afe4ec6..f2ce7aa9 100644
--- a/src/davidson/u0_hs2_u0.irp.f
+++ b/src/davidson/u0_hs2_u0.irp.f
@@ -291,7 +291,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
   ASSERT (istart > 0)
   ASSERT (istep  > 0)
 
-  !$OMP DO SCHEDULE(guided,64)
+  !$OMP DO SCHEDULE(dynamic,64)
   do k_a=istart+ishift,iend,istep
 
     krow = psi_bilinear_matrix_rows(k_a)
@@ -469,7 +469,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
   enddo
   !$OMP END DO
 
-  !$OMP DO SCHEDULE(guided,64)
+  !$OMP DO SCHEDULE(dynamic,64)
   do k_a=istart+ishift,iend,istep
 
 

From dc75f495001a13dd38209a220d7230fb5ad3e450 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 3 Feb 2025 14:05:28 +0100
Subject: [PATCH 11/59] Added selected MRCI module

---
 src/cipsi_utils/pt2_stoch_routines.irp.f |  1 +
 src/mo_basis/utils.irp.f                 |  2 +-
 src/mrci/EZFIO.cfg                       | 24 +++++++++
 src/mrci/NEED                            |  4 ++
 src/mrci/README.rst                      | 17 +++++++
 src/mrci/class.irp.f                     |  8 +++
 src/mrci/mrci.irp.f                      | 64 ++++++++++++++++++++++++
 src/mrci/save_energy.irp.f               | 10 ++++
 8 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 src/mrci/EZFIO.cfg
 create mode 100644 src/mrci/NEED
 create mode 100644 src/mrci/README.rst
 create mode 100644 src/mrci/class.irp.f
 create mode 100644 src/mrci/mrci.irp.f
 create mode 100644 src/mrci/save_energy.irp.f

diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f
index 162ab02c..68a3c9fc 100644
--- a/src/cipsi_utils/pt2_stoch_routines.irp.f
+++ b/src/cipsi_utils/pt2_stoch_routines.irp.f
@@ -212,6 +212,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
           ipos += 1
         endif
       enddo
+      call write_int(6,pt2_stoch_istate,'State')
       call write_int(6,sum(pt2_F),'Number of tasks')
       call write_int(6,ipos,'Number of fragmented tasks')
 
diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f
index 987c394a..3f83b518 100644
--- a/src/mo_basis/utils.irp.f
+++ b/src/mo_basis/utils.irp.f
@@ -228,7 +228,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix_eig(matrix,lda,m,n,eig,label)
   call dgemm('N','N',ao_num,m,m,1.d0,mo_coef_new,size(mo_coef_new,1),U,size(U,1),0.d0,mo_coef,size(mo_coef,1))
 
   do i=1,m
-    if (eig(i) > 1.d-20) then
+    if (D(i) > 1.d-20) then
       eig(i) = D(i)
     else
       eig(i) = 0.d0
diff --git a/src/mrci/EZFIO.cfg b/src/mrci/EZFIO.cfg
new file mode 100644
index 00000000..c8efd4b9
--- /dev/null
+++ b/src/mrci/EZFIO.cfg
@@ -0,0 +1,24 @@
+[energy]
+type: double precision
+doc: Calculated Selected CASSD energy
+interface: ezfio
+size: (determinants.n_states)
+
+[energy_pt2]
+type: double precision
+doc: Calculated CASSD energy + PT2
+interface: ezfio
+size: (determinants.n_states)
+
+
+[do_ddci]
+type: logical
+doc: If true, remove purely inactive double excitations
+interface: ezfio,provider,ocaml
+default: False
+
+[do_only_1h1p]
+type: logical
+doc: If true, do only one hole/one particle excitations
+interface: ezfio,provider,ocaml
+default: False
diff --git a/src/mrci/NEED b/src/mrci/NEED
new file mode 100644
index 00000000..ad99293f
--- /dev/null
+++ b/src/mrci/NEED
@@ -0,0 +1,4 @@
+cipsi
+generators_cas
+selectors_full
+davidson_undressed
diff --git a/src/mrci/README.rst b/src/mrci/README.rst
new file mode 100644
index 00000000..ac3c0139
--- /dev/null
+++ b/src/mrci/README.rst
@@ -0,0 +1,17 @@
+====
+mrci
+====
+
+
+|CIPSI| algorithm in the multi-reference CI space (CAS + Singles and Doubles).
+
+
+This module is the same as the :ref:`fci` module, except for the choice of the
+generator and selector determinants.
+
+The inactive, active and virtual |MOs| will need to be set with the
+:ref:`qp_set_mo_class` program.
+
+.. seealso::
+
+    The documentation of the :ref:`fci` module.
diff --git a/src/mrci/class.irp.f b/src/mrci/class.irp.f
new file mode 100644
index 00000000..271eb930
--- /dev/null
+++ b/src/mrci/class.irp.f
@@ -0,0 +1,8 @@
+BEGIN_PROVIDER [ logical, do_only_cas  ]
+ implicit none
+ BEGIN_DOC
+ ! In the CAS+SD case, always false
+ END_DOC
+ do_only_cas  = .False.
+END_PROVIDER
+
diff --git a/src/mrci/mrci.irp.f b/src/mrci/mrci.irp.f
new file mode 100644
index 00000000..ef4a2454
--- /dev/null
+++ b/src/mrci/mrci.irp.f
@@ -0,0 +1,64 @@
+program mrci
+  implicit none
+  BEGIN_DOC
+! Selected CAS+Singles and Doubles with stochastic selection
+! and PT2.
+!
+! This program performs a |CIPSI|-like selected |CI| using a
+! stochastic scheme for both the selection of the important Slater
+! determinants and the computation of the |PT2| correction. This
+! |CIPSI|-like algorithm will be performed for the lowest states of
+! the variational space (see :option:`determinants n_states`). The
+! program will stop when reaching at least one the two following
+! conditions:
+!
+! * number of Slater determinants > :option:`determinants n_det_max`
+! * |PT2| < :option:`perturbation pt2_max`
+!
+! The following other options can be of interest:
+!
+! :option:`determinants read_wf`
+!   When set to |false|, the program starts with a ROHF-like Slater
+!   determinant as a guess wave function. When set to |true|, the
+!   program starts with the wave function(s) stored in the |EZFIO|
+!   directory as guess wave function(s).
+!
+! :option:`determinants s2_eig`
+!   When set to |true|, the selection will systematically add all the
+!   necessary Slater determinants in order to have a pure spin wave
+!   function with an |S^2| value corresponding to
+!   :option:`determinants expected_s2`.
+!
+! For excited states calculations, it is recommended to start with
+! :ref:`.cis.` or :ref:`.cisd.` guess wave functions, eventually in
+! a restricted set of |MOs|, and to set :option:`determinants s2_eig`
+! to |true|.
+!
+  END_DOC
+
+  PROVIDE all_mo_integrals
+  if (.not.is_zmq_slave) then
+    PROVIDE psi_det psi_coef
+
+    write(json_unit,json_array_open_fmt) 'fci'
+
+    double precision, allocatable :: Ev(:),PT2(:)
+    allocate(Ev(N_states), PT2(N_states))
+    if (do_pt2) then
+      call run_stochastic_cipsi(Ev,PT2)
+    else
+      call run_cipsi
+    endif
+
+    write(json_unit,json_dict_uopen_fmt)
+    write(json_unit,json_dict_close_fmtx)
+    write(json_unit,json_array_close_fmtx)
+    call json_close
+
+  else
+    PROVIDE pt2_min_parallel_tasks
+
+    call run_slave_cipsi
+
+  endif
+end
diff --git a/src/mrci/save_energy.irp.f b/src/mrci/save_energy.irp.f
new file mode 100644
index 00000000..28840b6c
--- /dev/null
+++ b/src/mrci/save_energy.irp.f
@@ -0,0 +1,10 @@
+subroutine save_energy(E,pt2)
+  implicit none
+  BEGIN_DOC
+! Saves the energy in |EZFIO|.
+  END_DOC
+  double precision, intent(in) :: E(N_states), pt2(N_states)
+  call ezfio_set_mrci_energy(E(1:N_states))
+  call ezfio_set_mrci_energy_pt2(E(1:N_states)+pt2(1:N_states))
+end
+

From 6611722d32aad4ac8852de6eba7adef3834bfb49 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 4 Feb 2025 11:31:11 +0100
Subject: [PATCH 12/59] put more explicit print in print_su_pbe_ot.irp.f

---
 plugins/local/basis_correction/print_su_pbe_ot.irp.f | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins/local/basis_correction/print_su_pbe_ot.irp.f b/plugins/local/basis_correction/print_su_pbe_ot.irp.f
index 49f90ade..0f196e60 100644
--- a/plugins/local/basis_correction/print_su_pbe_ot.irp.f
+++ b/plugins/local/basis_correction/print_su_pbe_ot.irp.f
@@ -18,6 +18,7 @@ end
 subroutine print_su_pbe_ot
  implicit none
  integer :: istate 
+ print*,'Two flavours of PBE functionals :'
  do istate = 1, N_states
   write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-UEG       , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate)
  enddo

From 0647a9db5f7173189cf38451a7032436b21f0d57 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 4 Feb 2025 14:49:39 +0100
Subject: [PATCH 13/59] Starting optimization of open-shell ccsd

---
 src/ccsd/ccsd_space_orb.irp.f      |   5 +-
 src/ccsd/ccsd_spin_orb.irp.f       |   2 -
 src/ccsd/ccsd_spin_orb_sub.irp.f   | 444 ++++++++++++++---------------
 src/utils_cc/mo_integrals_cc.irp.f |  16 ++
 4 files changed, 229 insertions(+), 238 deletions(-)

diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f
index 53028ec0..91f703a0 100644
--- a/src/ccsd/ccsd_space_orb.irp.f
+++ b/src/ccsd/ccsd_space_orb.irp.f
@@ -1,9 +1,10 @@
-! Code
-
 program ccsd
 
   implicit none
 
+  BEGIN_DOC
+  ! Closed-shell CCSD
+  END_DOC
   read_wf = .True.
   touch read_wf
 
diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f
index 6f2de11c..04344fbb 100644
--- a/src/ccsd/ccsd_spin_orb.irp.f
+++ b/src/ccsd/ccsd_spin_orb.irp.f
@@ -1,5 +1,3 @@
-! Prog
-
 program ccsd
 
   implicit none
diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f
index 09d6a0fe..fe202ebf 100644
--- a/src/ccsd/ccsd_spin_orb_sub.irp.f
+++ b/src/ccsd/ccsd_spin_orb_sub.irp.f
@@ -11,9 +11,9 @@ subroutine run_ccsd_spin_orb
   double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:)
   double precision, allocatable :: r1(:,:), r2(:,:,:,:)
   double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:)
-  double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:)
-  
-  double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:)
+  double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:)
+
+  double precision, allocatable :: f_o(:), f_v(:)
   double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:)
   double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:)
   double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:)
@@ -24,8 +24,7 @@ subroutine run_ccsd_spin_orb
   double precision, allocatable :: all_err(:,:), all_t(:,:)
 
   logical                       :: not_converged
-  integer, allocatable          :: list_occ(:,:), list_vir(:,:)
-  integer                       :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4)
+  integer                       :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4)
   integer                       :: nb_iter, i,j,a,b
   double precision              :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
   integer(bit_kind)             :: det(N_int,2)
@@ -33,7 +32,7 @@ subroutine run_ccsd_spin_orb
   det = psi_det(:,:,cc_ref)
   print*,'Reference determinant:'
   call print_det(det,N_int)
- 
+
   ! Extract number of occ/vir alpha/beta spin orbitals
   !call extract_n_spin(det,n_spin)
   nOa = cc_nOa !n_spin(1)
@@ -41,107 +40,83 @@ subroutine run_ccsd_spin_orb
   nVa = cc_nVa !n_spin(3)
   nVb = cc_nVb !n_spin(4)
 
-  ! Total number of occ/vir spin orb
-  nO = cc_nOab !nOa + nOb
-  nV = cc_nVab !nVa + nVb
-  ! Debug
-  !print*,nO,nV
-
   ! Number of occ/vir spin orb per spin
   nO_S = cc_nO_S !(/nOa,nOb/)
   nV_S = cc_nV_S !(/nVa,nVb/)
   ! Debug
   !print*,nO_S,nV_S
 
-  ! Maximal number of occ/vir 
+  ! Maximal number of occ/vir
   nO_m = cc_nO_m !max(nOa, nOb)
   nV_m = cc_nV_m !max(nVa, nVb)
   ! Debug
   !print*,nO_m,nV_m
-  
-  allocate(list_occ(nO_m,2), list_vir(nV_m,2))
-  list_occ = cc_list_occ_spin
-  list_vir = cc_list_vir_spin
-  ! Debug
-  !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir)
-  !print*,list_occ(:,1)
-  !print*,list_occ(:,2)
-  !print*,list_vir(:,1)
-  !print*,list_vir(:,2)
 
   ! Allocation
-  allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV))
-  allocate(r1(nO,nV), r2(nO,nO,nV,nV))
-  allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV))
-  allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV))
-  allocate(v_oooo(nO,nO,nO,nO))
-  !allocate(v_vooo(nV,nO,nO,nO))
-  allocate(v_ovoo(nO,nV,nO,nO))
-  allocate(v_oovo(nO,nO,nV,nO))
-  allocate(v_ooov(nO,nO,nO,nV))
-  allocate(v_vvoo(nV,nV,nO,nO))
-  !allocate(v_vovo(nV,nO,nV,nO))
-  !allocate(v_voov(nV,nO,nO,nV))
-  allocate(v_ovvo(nO,nV,nV,nO))
-  allocate(v_ovov(nO,nV,nO,nV))
-  allocate(v_oovv(nO,nO,nV,nV))
-  !allocate(v_vvvo(nV,nV,nV,nO))
-  !allocate(v_vvov(nV,nV,nO,nV))
-  !allocate(v_vovv(nV,nO,nV,nV))
-  !allocate(v_ovvv(nO,nV,nV,nV))
-  !allocate(v_vvvv(nV,nV,nV,nV))
-  allocate(f_o(nO), f_v(nV))
-  allocate(f_oo(nO, nO))
-  allocate(f_ov(nO, nV))
-  allocate(f_vv(nV, nV))
-  
+  allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
+  allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
+  allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab))
+  allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
+  allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab))
+  !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab))
+  allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab))
+  allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab))
+  allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab))
+  allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab))
+  !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab))
+  !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab))
+  allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))
+  allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab))
+  allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
+  !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab))
+  !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab))
+  !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab))
+  !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab))
+  !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
+  allocate(f_o(cc_nOab), f_v(cc_nVab))
+
   ! Allocation for the diis
   if (cc_update_method == 'diis') then
-    allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
+    allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth))
     all_err = 0d0
     all_t   = 0d0
   endif
 
-  ! Fock elements
-  call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo)
-  call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov)
-  call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv)
-
   ! Diag elements
-  do i = 1, nO
-    f_o(i) = f_oo(i,i)
+  do i = 1, cc_nOab
+    f_o(i) = cc_spin_f_oo(i,i)
   enddo
-  do i = 1, nV
-    f_v(i) = f_vv(i,i)
+  do i = 1, cc_nVab
+    f_v(i) = cc_spin_f_vv(i,i)
   enddo
 
   ! Bi electronic integrals from list
   call wall_time(ti)
   ! OOOO
-  call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo)
+  call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo)
 
   ! OOO V
-  !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo)
-  call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo)
-  call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo)
-  call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov)
+  !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo)
+  call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo)
+  call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov)
 
   ! OO VV
-  call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo)
-  !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo)
-  !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov)
-  call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo)
-  call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov)
-  call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv)
+  call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo)
+  !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov)
+  call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo)
+  call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov)
+  call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv)
 
   ! O VVV
-  !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo)
-  !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov)
-  !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv)
-  !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv)
+  !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo)
+  !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov)
+  !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv)
+  !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv)
 
   ! VVVV
-  !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv)
+  !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv)
   call wall_time(tf)
   if (cc_dev) then
     print*,'Load bi elec int:',tf-ti,'s'
@@ -149,11 +124,11 @@ subroutine run_ccsd_spin_orb
 
   ! Init of T
   t1 = 0d0
-  call guess_t1(nO,nV,f_o,f_v,f_ov,t1)
-  call guess_t2(nO,nV,f_o,f_v,v_oovv,t2)
-  call compute_tau_spin(nO,nV,t1,t2,tau)
-  call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
-  
+  call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1)
+  call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2)
+  call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
+  call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
+
   ! Loop init
   nb_iter = 0
   not_converged = .True.
@@ -164,9 +139,9 @@ subroutine run_ccsd_spin_orb
 
   call det_energy(det,uncorr_energy)
   print*,'Det energy', uncorr_energy
-  call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+  call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy)
   print*,'guess energy', uncorr_energy+energy, energy
-  
+
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
   write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
@@ -179,18 +154,18 @@ subroutine run_ccsd_spin_orb
     ! Intermediates
     call wall_time(tbi)
     call wall_time(ti)
-    call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
-    call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
-    call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv)
+    call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo)
+    call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov)
+    call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Compute cFs:',tf-ti,'s'
     endif
-    
+
     call wall_time(ti)
-    call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
-    call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
-    !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
+    call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
+    call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
+    !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Compute cFs:',tf-ti,'s'
@@ -198,13 +173,13 @@ subroutine run_ccsd_spin_orb
 
     ! Residuals
     call wall_time(ti)
-    call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
+    call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Compute r1:',tf-ti,'s'
     endif
     call wall_time(ti)
-    call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
+    call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Compute r2:',tf-ti,'s'
@@ -218,29 +193,29 @@ subroutine run_ccsd_spin_orb
     call wall_time(ti)
     ! Update
     if (cc_update_method == 'diis') then
-      !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
-      !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
-      call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+      !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
+      call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
 
     ! Standard update as T = T - Delta
     elseif (cc_update_method == 'none') then
-      call update_t1(nO,nV,f_o,f_v,r1,t1)
-      call update_t2(nO,nV,f_o,f_v,r2,t2)
+      call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1)
+      call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2)
     else
       print*,'Unkonw cc_method_method: '//cc_update_method
     endif
 
-    call compute_tau_spin(nO,nV,t1,t2,tau)
-    call compute_tau_t_spin(nO,nV,t1,t2,tau_t)
+    call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
+    call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Update:',tf-ti,'s'
     endif
 
     ! Print
-    call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
+    call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy)
     call wall_time(tfi)
-    
+
     write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', &
          uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
     if (cc_dev) then
@@ -270,8 +245,8 @@ subroutine run_ccsd_spin_orb
   print*,''
 
   if (write_amplitudes) then
-    call write_t1(nO,nV,t1)
-    call write_t2(nO,nV,t2)
+    call write_t1(cc_nOab,cc_nVab,t1)
+    call write_t2(cc_nOab,cc_nVab,t2)
     call ezfio_set_utils_cc_io_amplitudes('Read')
   endif
 
@@ -286,20 +261,20 @@ subroutine run_ccsd_spin_orb
   deallocate(v_oooo)
   deallocate(v_ovoo,v_oovo)
   deallocate(v_ovvo,v_ovov,v_oovv)
-  
+
   double precision :: t_corr
   t_corr = 0.d0
   if (cc_par_t .and. elec_alpha_num  +elec_beta_num > 2) then
     print*,'CCSD(T) calculation...'
     call wall_time(ta)
-    !allocate(v_vvvo(nV,nV,nV,nO))
+    !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab))
     !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
     !   cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
     !   cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
-    !   nV,nV,nV,nO, v_vvvo)
+    !   cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo)
 
-    !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr)
-    call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr)
+    !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr)
+    call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr)
     !print*,'Working on it...'
     !call abort
     call wall_time(tb)
@@ -313,12 +288,12 @@ subroutine run_ccsd_spin_orb
   endif
 
   call save_energy(uncorr_energy + energy, t_corr)
-  
-  deallocate(f_oo,f_ov,f_vv,f_o,f_v)
+
+  deallocate(f_o,f_v)
   deallocate(v_ooov,v_vvoo,t1,t2)
   !deallocate(v_ovvv,v_vvvo,v_vovv)
   !deallocate(v_vvvv)
-  
+
 end
 
 ! Energy
@@ -354,7 +329,7 @@ subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy)
     do j=1,nO
       do a=1,nV
         do b=1,nV
-          energy = energy                                & 
+          energy = energy                                &
                  + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) &
                  + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b)
         end do
@@ -375,7 +350,7 @@ subroutine compute_tau_spin(nO,nV,t1,t2,tau)
   double precision,intent(in)   :: t2(nO,nO,nV,nV)
 
   double precision,intent(out)  :: tau(nO,nO,nV,nV)
-  
+
   integer                       :: i,j,k,l
   integer                       :: a,b,c,d
 
@@ -463,7 +438,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
   !$OMP v_ovov,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(1)
   do a=1,nV
     do i=1,nO
@@ -494,7 +469,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
              1d0, t1   , size(t1,1), &
                   cF_vv, size(cF_vv,1), &
              1d0, r1   , size(r1,1))
-  
+
   !do a=1,nV
   !  do i=1,nO
   !    do m=1,nO
@@ -531,7 +506,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
     !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) &
     !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
     !$OMP DEFAULT(NONE)
-    
+
 
     !$OMP DO collapse(3)
     !do f = 1, nV
@@ -546,28 +521,28 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
     !enddo
     !$OMP END DO
     !$OMP END PARALLEL
-      
+
     call dgemm('N','T', nO, nV, nO*nV, &
              -0.5d0, t2(1,1,1,f), size(t2,1), &
                      X_vovf, size(X_vovf,1), &
               1d0  , r1    , size(r1,1))
   enddo
-  
+
   !call dgemm('N','T', nO, nV, nO*nV*nV, &
   !           -0.5d0, t2    , size(t2,1), &
   !                   X_vovv, size(X_vovv,1), &
   !            1d0  , r1    , size(r1,1))
-  
+
   deallocate(X_vovf)
   !deallocate(X_vovv)
   allocate(X_oovv(nO,nO,nV,nV))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r1,t1,t2,X_oovv, &
   !$OMP f_o,f_v,v_oovo,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
   !$OMP DEFAULT(NONE)
-  
+
   !do a=1,nV
   !  do i=1,nO
   !    do e=1,nV
@@ -579,7 +554,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
   !    end do
   !  end do
   !end do
-  
+
   !$OMP DO collapse(3)
   do a = 1, nV
     do e = 1, nV
@@ -592,12 +567,12 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('T','N', nO, nV, nO*nO*nV, &
              -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), &
                      X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), &
              1d0   , r1    , size(r1,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,f,m,n) &
@@ -610,7 +585,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(X_oovv)
 
 end
@@ -684,7 +659,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -697,7 +672,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   !deallocate(X_oovv)
 
   !do b=1,nV
@@ -726,25 +701,25 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
              0.5d0, t2    , size(t2,1) * size(t2,2) * size(t2,3), &
                     A_vv  , size(A_vv,1), &
              0d0  , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
       do j=1,nO
         do i=1,nO
-          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a)
         end do
       end do
     end do
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-             
+
   deallocate(A_vv)!,X_oovv)
 
   !do b=1,nV
@@ -766,7 +741,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,m,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -789,13 +764,13 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
       do j=1,nO
         do i=1,nO
-          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b)
         end do
       end do
     end do
@@ -821,17 +796,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !  end do
   !end do
   allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
-  
+
   call dgemm('N','T', nO, nO, nV, &
         1d0, t1   , size(t1,1), &
              cF_ov, size(cF_ov,1), &
         0d0, A_oo , size(A_oo,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(t2,B_oovv,nO,nV) &
   !$OMP PRIVATE(i,m,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b = 1, nV
     do a = 1, nV
@@ -844,17 +819,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('N','N', nO, nO*nV*nV, nO, &
              0.5d0, A_oo, size(A_oo,1), &
                     B_oovv, size(B_oovv,1), &
              0d0  , X_oovv, size(X_oovv,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r2,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -888,7 +863,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
              0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), &
                     tau    , size(tau,1) * size(tau,2), &
              1d0  , r2     , size(r2,1) * size(r2,2))
-  
+
   !do b=1,nV
   !  do a=1,nV
   !    do j=1,nO
@@ -908,6 +883,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !           0.5d0, tau    , size(tau,1) * size(tau,2), &
   !                  cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), &
   !           1d0  , r2     , size(r2,1) * size(r2,2))
+
   double precision :: ti,tf
   call wall_time(ti)
   call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
@@ -915,7 +891,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   if (cc_dev) then
     print*,'cW_vvvv:',tf-ti,'s'
   endif
-  
+
   !do b=1,nV
   !  do a=1,nV
   !    do j=1,nO
@@ -923,7 +899,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
 
   !        do e=1,nV
   !          do m=1,nO
-  !            r2(i,j,a,b) = r2(i,j,a,b)                                                 & 
+  !            r2(i,j,a,b) = r2(i,j,a,b)                                                 &
   !                        + t2(i,m,a,e)*cW_ovvo(m,b,e,j) &
   !                        - t2(j,m,a,e)*cW_ovvo(m,b,e,i) &
   !                        - t2(i,m,b,e)*cW_ovvo(m,a,e,j) &
@@ -944,7 +920,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do a = 1, nV
     do i = 1, nO
@@ -961,24 +937,24 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
     do b = 1, nV
       do e = 1, nV
         do m = 1, nO
-          B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) 
+          B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j)
         enddo
       enddo
     enddo
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('T','N', nO*nV, nV*nO, nO*nV, &
              1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), &
                   B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), &
              0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r2,X_ovvo,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b = 1, nV
     do a = 1, nV
@@ -992,15 +968,15 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(A_ovov,B_ovvo,X_ovvo)
   allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do m = 1, nO
     do j = 1, nO
@@ -1013,22 +989,22 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('N','N', nO, nV*nO*nO, nV, &
              1d0, t1    , size(t1,1), &
                   A_vvoo, size(A_vvoo,1), &
              0d0, B_ovoo, size(B_ovoo,1))
-  
+
   call dgemm('N','N', nO*nV*nO, nV, nO, &
              1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), &
                   t1    , size(t1,1), &
              0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(r2,C_ovov,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -1042,9 +1018,9 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(A_vvoo, B_ovoo, C_ovov)
-                  
+
   !do b=1,nV
   !  do a=1,nV
   !    do j=1,nO
@@ -1065,12 +1041,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
     call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
          cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
          nV,nV,nO, v_vbvo)
-    
+
     !$OMP PARALLEL &
     !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) &
     !$OMP PRIVATE(i,j,a,e,m) &
     !$OMP DEFAULT(NONE)
-    
+
     !$OMP DO collapse(2)
     do e = 1, nV
       do j = 1, nO
@@ -1093,12 +1069,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
     !           1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), &
     !                t1    , size(t1,1), &
     !           0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3))
-    
+
     !$OMP PARALLEL &
     !$OMP SHARED(b,r2,X_vboo,nO,nV) &
     !$OMP PRIVATE(i,j,a,e,m) &
     !$OMP DEFAULT(NONE)
-    
+
     !$OMP DO collapse(2)
     !do b = 1, nV
       do a = 1, nV
@@ -1113,7 +1089,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
     !$OMP END DO
     !$OMP END PARALLEL
   enddo
-  
+
   !deallocate(A_vvov)!,X_vvoo)
   deallocate(A_vbov, X_vboo, v_vbvo)
   allocate(X_vvoo(nV,nV,nO,nO))
@@ -1132,7 +1108,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !  end do
   !end do
   !allocate(X_vvoo(nV,nV,nO,nO))
-  
+
   call dgemm('T','N', nV, nV*nO*nO, nO, &
              1d0, t1    , size(t1,1), &
                   v_ovoo, size(v_ovoo,1), &
@@ -1142,7 +1118,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) &
   !$OMP PRIVATE(i,j,a,b,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -1154,7 +1130,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
     end do
   end do
   !$OMP END DO
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -1167,7 +1143,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(X_vvoo)
 
 end
@@ -1182,16 +1158,16 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2)
   double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)
   double precision, intent(in)    :: F_oo(nO,nV), F_ov(nO,nV)
   double precision, intent(in)    :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV)
-  
+
   double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
-  
+
   double precision, allocatable   :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:)
   integer                         :: i,j,m,a,b
 
   allocate(cF_oo(nO,nO))
-  
+
   call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo)
-  
+
   !do a=1,nV
   !  do i=1,nO
   !    do m=1,nO
@@ -1218,13 +1194,13 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2)
   !    end do
   !  end do
   !end do
-  
+
   allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV))
   !$OMP PARALLEL &
   !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,m,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -1247,20 +1223,20 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2)
   !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
       do j=1,nO
         do i=1,nO
-          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) 
+          r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b)
         end do
       end do
     end do
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(cF_oo,X_oovv,Y_oovv)
 
 end
@@ -1274,7 +1250,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   integer, intent(in)             :: nO,nV
   double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
   double precision, intent(in)    :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV)
-  
+
   double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
 
   double precision, allocatable   :: cF_ov(:,:), A_oo(:,:), A_vv(:,:)
@@ -1282,14 +1258,14 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   integer                         :: i,j,a,b,e,m
 
   allocate(cF_ov(nO,nV))
-  
+
   call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov)
 
   !$OMP PARALLEL &
   !$OMP SHARED(r1,t2,cF_ov,nO,nV) &
   !$OMP PRIVATE(i,a,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(1)
   do a=1,nV
     do i=1,nO
@@ -1334,22 +1310,22 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   !$OMP SHARED(nO,nV,r2,X_oovv) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
       do j=1,nO
         do i=1,nO
-          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) 
+          r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a)
         end do
       end do
     end do
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-             
+
   deallocate(A_vv)
-  
+
   !do b=1,nV
   !  do a=1,nV
   !    do j=1,nO
@@ -1367,17 +1343,17 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   !  end do
   !end do
   allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV))
-  
+
   call dgemm('N','T', nO, nO, nV, &
         1d0, t1   , size(t1,1), &
              cF_ov, size(cF_ov,1), &
         0d0, A_oo , size(A_oo,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(t2,B_oovv,nO,nV) &
   !$OMP PRIVATE(i,m,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b = 1, nV
     do a = 1, nV
@@ -1390,7 +1366,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('N','N', nO, nO*nV*nV, nO, &
              0.5d0, A_oo, size(A_oo,1), &
                     B_oovv, size(B_oovv,1), &
@@ -1400,7 +1376,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   !$OMP SHARED(r2,X_oovv,nO,nV) &
   !$OMP PRIVATE(i,j,a,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b=1,nV
     do a=1,nV
@@ -1413,9 +1389,9 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2)
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(cF_ov,A_oo,B_oovv,X_oovv)
-  
+
 end
 
 ! Use cF_vv
@@ -1426,18 +1402,18 @@ subroutine use_cF_vv(nO,nV,t1,t2,r1,r2)
 
   integer, intent(in)             :: nO,nV
   double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV)
-  
+
   double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV)
 
   double precision, allocatable   :: cF_vv(:,:)
   integer                         :: i,j,a,b,e,m
 
   allocate(cF_vv(nV,nV))
-  
+
   !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv)
 
   deallocate(cF_vv)
-  
+
 end
 
 ! Use cW_vvvd
@@ -1450,7 +1426,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
   double precision, intent(in)    :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
   double precision, intent(in)    :: v_oovv(nO,nO,nV,nV)
   !double precision, intent(in)    :: v_vovv(nV,nO,nV,nV)
-  
+
   double precision, intent(inout) :: r2(nO,nO,nV,nV)
 
   double precision, allocatable   :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:)
@@ -1460,7 +1436,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
   allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV))
 
   !PROVIDE cc_nVab
-  
+
   !do b=1,nV
   !  do a=1,nV
   !    do j=1,nO
@@ -1476,14 +1452,14 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
   !    end do
   !  end do
   !end do
-  
+
   do f = 1, nV
     call wall_time(ti)
     !$OMP PARALLEL &
     !$OMP SHARED(tau,tau_f,f,nO,nV) &
     !$OMP PRIVATE(i,j,e) &
     !$OMP DEFAULT(NONE)
-    
+
     !$OMP DO collapse(2)
     do e = 1, nV
       do j = 1, nO
@@ -1515,7 +1491,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
     if (cc_dev .and. f == 1) then
       print*,'vovf', tf-ti
     endif
-    
+
     call wall_time(ti)
     call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
     call wall_time(tf)
@@ -1535,7 +1511,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2)
   enddo
 
   deallocate(cW_vvvf,v_vvvf,v_vovf)
-  
+
 end
 
 ! cF_oo
@@ -1562,7 +1538,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo)
   !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) &
   !$OMP PRIVATE(i,m,n,e) &
   !$OMP DEFAULT(NONE)
-  
+
   !do i=1,nO
   !  do m=1,nO
   !    cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i)
@@ -1580,7 +1556,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo)
     cF_oo(i,i) = 0d0
   end do
   !$OMP END DO
-  
+
   do e=1,nV
     do n=1,nO
       !$OMP DO collapse(1)
@@ -1620,8 +1596,8 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo)
   call dgemm('N','T', nO, nO, nO*nV*nV, &
              0.5d0, v_oovv, size(v_oovv,1), &
                     tau_t , size(tau_t,1), &
-             1d0  , cF_oo , size(cF_oo,1)) 
-  
+             1d0  , cF_oo , size(cF_oo,1))
+
 end
 
 ! cF_ov
@@ -1643,7 +1619,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov)
   !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) &
   !$OMP PRIVATE(i,a,m,n,e,f) &
   !$OMP DEFAULT(NONE)
-  
+
   !cF_ov = Fov
 
   !$OMP DO collapse(1)
@@ -1659,7 +1635,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov)
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
 end
 
 ! cF_vv
@@ -1677,7 +1653,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv)
   !double precision,intent(in)   :: v_ovvv(nO,nV,nV,nV)
 
   double precision,intent(out)  :: cF_vv(nV,nV)
-  
+
   double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:)
   integer                       :: i,j,m,n
   integer                       :: a,b,e,f
@@ -1699,7 +1675,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv)
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
- 
+
   !do e=1,nV
   !  do a=1,nV
   !    do m=1,nO
@@ -1711,7 +1687,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv)
              -0.5d0, t1   , size(t1,1), &
                      Fov  , size(Fov,1), &
               1d0  , cF_vv, size(cF_vv,1))
-  
+
   !do e=1,nV
   !  do a=1,nV
   !    do m=1,nO
@@ -1791,7 +1767,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
   integer                       :: a,b,e,f
   double precision, allocatable :: X_oooo(:,:,:,:)
 
-  ! oooo block  
+  ! oooo block
 
   !cW_oooo = v_oooo
 
@@ -1809,7 +1785,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
   !  end do
   !end do
   allocate(X_oooo(nO,nO,nO,nO))
-  
+
   call dgemm('N','T', nO*nO*nO, nO, nV, &
              1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), &
                   t1    , size(t1,1), &
@@ -1830,14 +1806,14 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(X_oooo)
-  
+
   !do m=1,nO
   !  do n=1,nO
   !    do i=1,nO
   !      do j=1,nO
-  !         
+  !
   !        do e=1,nV
   !          do f=1,nV
   !            cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f)
@@ -1853,7 +1829,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
              0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), &
                      tau    , size(tau,1) * size(tau,2), &
              1.d0  , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2))
-  
+
 end
 
 ! cW_ovvo
@@ -1913,7 +1889,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
     call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, &
                               cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, &
                               nO,nV,nV, v_ovev)
-     
+
     call dgemm('N','T', nO*nV, nO, nV, &
                1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), &
                      t1     , size(t1,1), &
@@ -1950,14 +1926,14 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
   !    end do
   !  end do
   !end do
-  
+
   allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(A_oovo,v_oovo,nO,nV) &
   !$OMP PRIVATE(j,e,m,n) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do j=1,nO
     do e=1,nV
@@ -1970,17 +1946,17 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('T','N', nV, nO*nV*nO, nO, &
              1d0, t1    , size(t1,1), &
                   A_oovo, size(A_oovo,1), &
              0d0, B_vovo, size(B_vovo,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) &
   !$OMP PRIVATE(j,e,m,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do j=1,nO
     do e=1,nV
@@ -2015,7 +1991,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
   !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) &
   !$OMP PRIVATE(f,n,m,e,j,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do b = 1, nV
     do j = 1, nO
@@ -2039,19 +2015,19 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   call dgemm('T','N', nO*nV, nV*nO, nV*nO, &
              1d0, A_voov, size(A_voov,1) * size(A_voov,2), &
                   B_voov, size(B_voov,1) * size(B_voov,2), &
              0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2))
-  
+
   deallocate(A_voov,B_voov)
 
   !$OMP PARALLEL &
   !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) &
   !$OMP PRIVATE(j,e,m,b) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(3)
   do j = 1, nO
     do e = 1, nV
@@ -2064,7 +2040,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(C_ovov)
 
 end
@@ -2072,7 +2048,7 @@ end
 ! cW_vvvv
 
 subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
- 
+
   implicit none
 
   integer,intent(in)            :: nO,nV
@@ -2154,14 +2130,14 @@ subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
   deallocate(A_ovvv,B_vvvv)
 
   !do a=1,nV
   !  do b=1,nV
   !    do e=1,nV
   !      do f=1,nV
-  !         
+  !
   !        do m=1,nO
   !          do n=1,nO
   !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
@@ -2182,7 +2158,7 @@ end
 ! cW_vvvf
 
 subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
- 
+
   implicit none
 
   integer,intent(in)            :: nO,nV,f
@@ -2207,7 +2183,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
   !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) &
   !$OMP PRIVATE(a,b,c,d,e,m) &
   !$OMP DEFAULT(NONE)
-  
+
   !$OMP DO collapse(2)
   do c = 1, nV
     do b = 1, nV
@@ -2248,7 +2224,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
              1d0, t1    , size(t1,1), &
                   A_ovvf, size(A_ovvf,1), &
              0d0, B_vvvf, size(B_vvvf,1))
-  
+
   !$OMP PARALLEL &
   !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) &
   !$OMP PRIVATE(a,b,c,d,e,m,n) &
@@ -2264,14 +2240,14 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
     end do
   end do
   !$OMP END DO NOWAIT
-  
+
   !deallocate(A_ovvf,B_vvvf)
 
   !do a=1,nV
   !  do b=1,nV
   !    do e=1,nV
   !      do f=1,nV
-  !         
+  !
   !        do m=1,nO
   !          do n=1,nO
   !            cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f)
@@ -2292,13 +2268,13 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf)
     enddo
   enddo
   !$OMP END DO
-  !$OMP END PARALLEL  
-  
+  !$OMP END PARALLEL
+
   call dgemm('T','N', nV*nV, nV, nO*nO, &
              0.25d0, tau    , size(tau,1) * size(tau,2), &
                      v_oovf , size(v_oovf,1) * size(v_oovf,2), &
              1.d0  , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2))
-  
+
   deallocate(v_oovf)
   deallocate(A_ovvf,B_vvvf)
 
diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f
index 6f21c316..eebc84ca 100644
--- a/src/utils_cc/mo_integrals_cc.irp.f
+++ b/src/utils_cc/mo_integrals_cc.irp.f
@@ -1006,6 +1006,22 @@ BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)]
 
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, cc_spin_f_oo, (cc_nOab, cc_nOab)]
+ implicit none
+ call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nO_m, cc_nO_S, cc_nO_S, cc_list_occ_spin, cc_list_occ_spin, cc_nOab, cc_nOab, cc_spin_f_oo)
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, cc_spin_f_ov, (cc_nOab, cc_nVab)]
+ implicit none
+ call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nV_m, cc_nO_S, cc_nV_S, cc_list_occ_spin, cc_list_vir_spin, cc_nOab, cc_nVab, cc_spin_f_ov)
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, cc_spin_f_vv, (cc_nVab, cc_nVab)]
+ implicit none
+ call gen_f_spin(psi_det(1,1,cc_ref), cc_nV_m, cc_nV_m, cc_nV_S, cc_nV_S, cc_list_vir_spin, cc_list_vir_spin, cc_nVab, cc_nVab, cc_spin_f_vv)
+END_PROVIDER
+
 ! Shift
 
 subroutine shift_idx_spin(s,n_S,shift)

From e3e874879f47013b48963d729f8f312bf6ddf33b Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 4 Feb 2025 18:11:47 +0100
Subject: [PATCH 14/59] Introduced Cholesky in gen_spin_space

---
 src/utils_cc/mo_integrals_cc.irp.f | 1384 ++++++++++++++++++++--------
 1 file changed, 1022 insertions(+), 362 deletions(-)

diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f
index eebc84ca..813c186a 100644
--- a/src/utils_cc/mo_integrals_cc.irp.f
+++ b/src/utils_cc/mo_integrals_cc.irp.f
@@ -47,7 +47,7 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
 
   integer                       :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
     double precision, allocatable :: buffer(:,:,:,:)
     double precision, allocatable :: v1(:,:,:), v2(:,:,:)
     allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4))
@@ -132,7 +132,7 @@ end
 
 BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
   implicit none
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
     integer                       :: i1,i2,i3,i4
     double precision, allocatable :: buffer(:,:,:)
     call set_multiple_levels_omp(.False.)
@@ -190,7 +190,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -233,7 +233,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -277,7 +277,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n
   implicit none
 
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -311,7 +311,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -345,7 +345,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -379,7 +379,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -422,7 +422,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -465,7 +465,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -533,7 +533,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -567,7 +567,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n
 
   implicit none
 
-  if (do_ao_cholesky) then
+  if (do_mo_cholesky) then
 
     integer :: i1, i2, i3, i4
     integer :: n1, n2, n3, n4
@@ -1169,7 +1169,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
   implicit none
 
    BEGIN_DOC
-  ! Compute the bi electronic integrals corresponding to four lists of spin orbitals.
+  ! Compute the 2e-integrals corresponding to four lists of spin orbitals.
   ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ...
   END_DOC
 
@@ -1178,129 +1178,306 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4,
   integer, intent(in)           :: dim1, dim2, dim3, dim4
   double precision, intent(out) :: v(dim1,dim2,dim3,dim4)
 
-  double precision              :: mo_two_e_integral
+  double precision, external    :: mo_two_e_integral
   integer                       :: i,j,k,l,idx_i,idx_j,idx_k,idx_l
   integer                       :: i_shift,j_shift,k_shift,l_shift
   integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
   integer                       :: si,sj,sk,sl,s
 
-  PROVIDE cc_space_v
+  double precision, allocatable :: buffer(:,:,:,:)
+  double precision, allocatable :: v1(:,:,:), v2(:,:,:)
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) &
-  !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, &
-  !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,&
-  !$OMP tmp_i,tmp_j,tmp_k,tmp_l)&
-  !$OMP DEFAULT(NONE)
+  if (do_mo_cholesky) then
 
-  do sl = 1, 2
-    call shift_idx_spin(sl,n4_S,l_shift)
-    do sk = 1, 2
-      call shift_idx_spin(sk,n3_S,k_shift)
-      do sj = 1, 2
-        call shift_idx_spin(sj,n2_S,j_shift)
-        do si = 1, 2
-          call shift_idx_spin(si,n1_S,i_shift)
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sk = 1, 2
+        call shift_idx_spin(sk,n3_S,k_shift)
+        do sj = 1, 2
+          call shift_idx_spin(sj,n2_S,j_shift)
+          do si = 1, 2
+            call shift_idx_spin(si,n1_S,i_shift)
 
-          s = si+sj+sk+sl
-          ! <aa||aa> or <bb||bb>
-          if (s == 4 .or. s == 8) then
-            !$OMP DO collapse(3)
-            do tmp_l = 1, n4_S(sl)
-              do tmp_k = 1, n3_S(sk)
-                do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)
-                    l = list4(tmp_l,sl)
-                    idx_l = tmp_l + l_shift
-                    k = list3(tmp_k,sk)
-                    idx_k = tmp_k + k_shift
-                    j = list2(tmp_j,sj)
-                    idx_j = tmp_j + j_shift
-                    i = list1(tmp_i,si)
-                    idx_i = tmp_i + i_shift
-                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
-                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+            s = si+sj+sk+sl
+            ! <aa||aa> or <bb||bb>
+            if (s == 4 .or. s == 8) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl)))
+              allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(3)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                  do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l)
+                    enddo
                   enddo
                 enddo
               enddo
-            enddo
-            !$OMP END DO
+              !$OMP END DO
+              !$OMP END PARALLEL
 
-          ! <ab||ab> or <ba||ba>
-          elseif (si == sk .and. sj == sl) then
-            !$OMP DO collapse(3)
-            do tmp_l = 1, n4_S(sl)
-              do tmp_k = 1, n3_S(sk)
-                do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)
-                    l = list4(tmp_l,sl)
-                    idx_l = tmp_l + l_shift
-                    k = list3(tmp_k,sk)
-                    idx_k = tmp_k + k_shift
-                    j = list2(tmp_j,sj)
-                    idx_j = tmp_j + j_shift
-                    i = list1(tmp_i,si)
-                    idx_i = tmp_i + i_shift
-                       !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
-                       v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l)
+              deallocate(v1, v2, buffer)
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl)))
+
+              call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(3)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                  do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = v(idx_i,idx_j,idx_k,idx_l) - buffer(j,k,i,l)
+                    enddo
                   enddo
                 enddo
               enddo
-            enddo
-            !$OMP END DO
+              !$OMP END DO
+              !$OMP END PARALLEL
 
-          ! <ab||ba> or <ba||ab>
-          elseif (si == sl .and. sj == sk) then
-            !$OMP DO collapse(3)
-            do tmp_l = 1, n4_S(sl)
-              do tmp_k = 1, n3_S(sk)
-                do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)
-                    l = list4(tmp_l,sl)
-                    idx_l = tmp_l + l_shift
-                    k = list3(tmp_k,sk)
-                    idx_k = tmp_k + k_shift
-                    j = list2(tmp_j,sj)
-                    idx_j = tmp_j + j_shift
-                    i = list1(tmp_i,si)
-                    idx_i = tmp_i + i_shift
-                       !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
-                       v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+              deallocate(v1, v2, buffer)
+
+            ! <ab||ab> or <ba||ba>
+            elseif (si == sk .and. sj == sl) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl)))
+              allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(3)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                  do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l)
+                    enddo
                   enddo
                 enddo
               enddo
-            enddo
-            !$OMP END DO
-          else
-             !$OMP DO collapse(3)
-            do tmp_l = 1, n4_S(sl)
-              do tmp_k = 1, n3_S(sk)
-                do tmp_j = 1, n2_S(sj)
-                  do tmp_i = 1, n1_S(si)
-                    l = list4(tmp_l,sl)
-                    idx_l = tmp_l + l_shift
-                    k = list3(tmp_k,sk)
-                    idx_k = tmp_k + k_shift
-                    j = list2(tmp_j,sj)
-                    idx_j = tmp_j + j_shift
-                    i = list1(tmp_i,si)
-                    idx_i = tmp_i + i_shift
-                       v(idx_i,idx_j,idx_k,idx_l) = 0d0
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+            ! <ab||ba> or <ba||ab>
+            elseif (si == sl .and. sj == sk) then
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl)))
+
+              call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(3)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                  do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = buffer(j,k,i,l)
+                    enddo
                   enddo
                 enddo
               enddo
-            enddo
-            !$OMP END DO
-          endif
+              !$OMP END DO
+              !$OMP END PARALLEL
 
+              deallocate(v1, v2, buffer)
+
+            else
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(3)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                  do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = 0d0
+                    enddo
+                  enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+            endif
+
+          enddo
         enddo
       enddo
     enddo
-  enddo
-  !$OMP END PARALLEL
+
+  else
+    !$OMP PARALLEL &
+    !$OMP SHARED(n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) &
+    !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, &
+    !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,&
+    !$OMP tmp_i,tmp_j,tmp_k,tmp_l)&
+    !$OMP DEFAULT(NONE)
+
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sk = 1, 2
+        call shift_idx_spin(sk,n3_S,k_shift)
+        do sj = 1, 2
+          call shift_idx_spin(sj,n2_S,j_shift)
+          do si = 1, 2
+            call shift_idx_spin(si,n1_S,i_shift)
+
+            s = si+sj+sk+sl
+            ! <aa||aa> or <bb||bb>
+            if (s == 4 .or. s == 8) then
+              !$OMP DO collapse(3)
+              do tmp_l = 1, n4_S(sl)
+                do tmp_k = 1, n3_S(sk)
+                  do tmp_j = 1, n2_S(sj)
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    do tmp_i = 1, n1_S(si)
+                      i = list1(tmp_i,si)
+                      idx_i = tmp_i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                    enddo
+                  enddo
+                enddo
+              enddo
+              !$OMP END DO
+
+            ! <ab||ab> or <ba||ba>
+            elseif (si == sk .and. sj == sl) then
+              !$OMP DO collapse(3)
+              do tmp_l = 1, n4_S(sl)
+                do tmp_k = 1, n3_S(sk)
+                  do tmp_j = 1, n2_S(sj)
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    do tmp_i = 1, n1_S(si)
+                      i = list1(tmp_i,si)
+                      idx_i = tmp_i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                    enddo
+                  enddo
+                enddo
+              enddo
+              !$OMP END DO
+
+            ! <ab||ba> or <ba||ab>
+            elseif (si == sl .and. sj == sk) then
+              !$OMP DO collapse(3)
+              do tmp_l = 1, n4_S(sl)
+                do tmp_k = 1, n3_S(sk)
+                  do tmp_j = 1, n2_S(sj)
+                    l = list4(tmp_l,sl)
+                    idx_l = tmp_l + l_shift
+                    k = list3(tmp_k,sk)
+                    idx_k = tmp_k + k_shift
+                    j = list2(tmp_j,sj)
+                    idx_j = tmp_j + j_shift
+                    do tmp_i = 1, n1_S(si)
+                      i = list1(tmp_i,si)
+                      idx_i = tmp_i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                    enddo
+                  enddo
+                enddo
+              enddo
+              !$OMP END DO
+            else
+               !$OMP DO collapse(3)
+              do tmp_l = 1, n4_S(sl)
+                do tmp_k = 1, n3_S(sk)
+                  do tmp_j = 1, n2_S(sj)
+                    idx_l = tmp_l + l_shift
+                    idx_k = tmp_k + k_shift
+                    idx_j = tmp_j + j_shift
+                    do tmp_i = 1, n1_S(si)
+                      i = list1(tmp_i,si)
+                      idx_i = tmp_i + i_shift
+                      v(idx_i,idx_j,idx_k,idx_l) = 0d0
+                    enddo
+                  enddo
+                enddo
+              enddo
+              !$OMP END DO
+            endif
+
+          enddo
+        enddo
+      enddo
+    enddo
+    !$OMP END PARALLEL
+
+  endif
 
 end
 
+
 ! V_3idx
 
 subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l)
@@ -1323,7 +1500,8 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
   integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
   integer                       :: si,sj,sk,sl,s
 
-  PROVIDE cc_space_v
+  double precision, allocatable :: buffer(:,:,:)
+  double precision, allocatable :: v1(:,:,:), v2(:,:,:)
 
   if (idx_l <= n4_S(1)) then
     sl = 1
@@ -1334,99 +1512,255 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,
   tmp_l = idx_l - l_shift
   l = list4(tmp_l,sl)
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) &
-  !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, &
-  !$OMP i,j,k,idx_i,idx_j,idx_k,&
-  !$OMP tmp_i,tmp_j,tmp_k)&
-  !$OMP DEFAULT(NONE)
+  if (do_mo_cholesky) then
 
-  do sk = 1, 2
-    call shift_idx_spin(sk,n3_S,k_shift)
-    do sj = 1, 2
-      call shift_idx_spin(sj,n2_S,j_shift)
-      do si = 1, 2
-        call shift_idx_spin(si,n1_S,i_shift)
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
 
-        s = si+sj+sk+sl
-        ! <aa||aa> or <bb||bb>
-        if (s == 4 .or. s == 8) then
-          !$OMP DO collapse(2)
-          do tmp_k = 1, n3_S(sk)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                k = list3(tmp_k,sk)
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1))
+              allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k)
+              !$OMP DO collapse(2)
+              do k = 1, n3_S(sk)
+                do j = 1, n2_S(sj)
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_l(idx_i,idx_j,idx_k) = buffer(i,k,j)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1))
+              allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si)))
+
+              call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k)
+              !$OMP DO collapse(2)
+              do k = 1, n3_S(sk)
+                do j = 1, n2_S(sj)
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_l(idx_i,idx_j,idx_k) = v_l(idx_i,idx_j,idx_k) - buffer(j,k,i)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1))
+              allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k)
+              !$OMP DO collapse(2)
+              do k = 1, n3_S(sk)
+                do j = 1, n2_S(sj)
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_l(idx_i,idx_j,idx_k) = buffer(i,k,j)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1))
+              allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si)))
+
+              call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, -1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do k = 1, n3_S(sk)
+                do j = 1, n2_S(sj)
+                    idx_k = k + k_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_l(idx_i,idx_j,idx_k) = buffer(j,k,i)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          else
+            !$OMP DO collapse(2)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_j = 1, n2_S(sj)
                 idx_k = tmp_k + k_shift
-                j = list2(tmp_j,sj)
                 idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
-                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                do tmp_i = 1, n1_S(si)
+                  idx_i = tmp_i + i_shift
+                  v_l(idx_i,idx_j,idx_k) = 0d0
+                enddo
               enddo
             enddo
-          enddo
-          !$OMP END DO
+            !$OMP END DO
 
-        ! <ab||ab> or <ba||ba>
-        elseif (si == sk .and. sj == sl) then
-          !$OMP DO collapse(2)
-          do tmp_k = 1, n3_S(sk)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
-                   v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-
-        ! <ab||ba> or <ba||ab>
-        elseif (si == sl .and. sj == sk) then
-          !$OMP DO collapse(2)
-          do tmp_k = 1, n3_S(sk)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
-                   v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        else
-          !$OMP DO collapse(2)
-          do tmp_k = 1, n3_S(sk)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   v_l(idx_i,idx_j,idx_k) = 0d0
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        endif
+          endif
 
+        enddo
       enddo
     enddo
-  enddo
-  !$OMP END PARALLEL
+
+
+  else
+
+    PROVIDE cc_space_v
+
+    !$OMP PARALLEL &
+    !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) &
+    !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, &
+    !$OMP i,j,k,idx_i,idx_j,idx_k,&
+    !$OMP tmp_i,tmp_j,tmp_k)&
+    !$OMP DEFAULT(NONE)
+
+    do sk = 1, 2
+      call shift_idx_spin(sk,n3_S,k_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
+
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+            !$OMP DO collapse(2)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_j = 1, n2_S(sj)
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+            !$OMP DO collapse(2)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_j = 1, n2_S(sj)
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+            !$OMP DO collapse(2)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_j = 1, n2_S(sj)
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          else
+            !$OMP DO collapse(2)
+            do tmp_k = 1, n3_S(sk)
+              do tmp_j = 1, n2_S(sj)
+                idx_k = tmp_k + k_shift
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  idx_i = tmp_i + i_shift
+                  v_l(idx_i,idx_j,idx_k) = 0d0
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          endif
+
+        enddo
+      enddo
+    enddo
+    !$OMP END PARALLEL
+
+  endif
 
 end
 
@@ -1452,7 +1786,8 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
   integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
   integer                       :: si,sj,sk,sl,s
 
-  PROVIDE cc_space_v
+  double precision, allocatable :: buffer(:,:,:)
+  double precision, allocatable :: v1(:,:,:), v2(:,:,:)
 
   if (idx_k <= n3_S(1)) then
     sk = 1
@@ -1463,100 +1798,257 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l
   tmp_k = idx_k - k_shift
   k = list3(tmp_k,sk)
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) &
-  !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, &
-  !$OMP i,j,l,idx_i,idx_j,idx_l,&
-  !$OMP tmp_i,tmp_j,tmp_l)&
-  !$OMP DEFAULT(NONE)
+  if (do_mo_cholesky) then
 
-  do sl = 1, 2
-    call shift_idx_spin(sl,n4_S,l_shift)
-    do sj = 1, 2
-      call shift_idx_spin(sj,n2_S,j_shift)
-      do si = 1, 2
-        call shift_idx_spin(si,n1_S,i_shift)
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
 
-        s = si+sj+sk+sl
-        ! <aa||aa> or <bb||bb>
-        if (s == 4 .or. s == 8) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl)))
+              allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_k(idx_i,idx_j,idx_l) = buffer(i,j,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl)))
+
+              call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_k(idx_i,idx_j,idx_l) = v_k(idx_i,idx_j,idx_l) - buffer(j,i,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl)))
+              allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_k(idx_i,idx_j,idx_l) = buffer(i,j,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+
+              allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl)))
+
+              call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do j = 1, n2_S(sj)
+                    idx_l = l + l_shift
+                    idx_j = j + j_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_k(idx_i,idx_j,idx_l) = buffer(j,i,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          else
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_j = 1, n2_S(sj)
                 idx_l = tmp_l + l_shift
-                j = list2(tmp_j,sj)
                 idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
-                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                do tmp_i = 1, n1_S(si)
+                  idx_i = tmp_i + i_shift
+                  v_k(idx_i,idx_j,idx_l) = 0d0
+                enddo
               enddo
             enddo
-          enddo
-          !$OMP END DO
+            !$OMP END DO
 
-        ! <ab||ab> or <ba||ba>
-        elseif (si == sk .and. sj == sl) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
-                   v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-
-        ! <ab||ba> or <ba||ab>
-        elseif (si == sl .and. sj == sk) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
-                   v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        else
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_j = 1, n2_S(sj)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                j = list2(tmp_j,sj)
-                idx_j = tmp_j + j_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   v_k(idx_i,idx_j,idx_l) = 0d0
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        endif
+          endif
 
+        enddo
       enddo
     enddo
-  enddo
-  !$OMP END PARALLEL
 
+  else
+
+    PROVIDE cc_space_v
+
+    !$OMP PARALLEL &
+    !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) &
+    !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, &
+    !$OMP i,j,l,idx_i,idx_j,idx_l,&
+    !$OMP tmp_i,tmp_j,tmp_l)&
+    !$OMP DEFAULT(NONE)
+
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sj = 1, 2
+        call shift_idx_spin(sj,n2_S,j_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
+
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_j = 1, n2_S(sj)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_j = 1, n2_S(sj)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_j = 1, n2_S(sj)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          else
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_j = 1, n2_S(sj)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                j = list2(tmp_j,sj)
+                idx_j = tmp_j + j_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_k(idx_i,idx_j,idx_l) = 0d0
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          endif
+
+        enddo
+      enddo
+    enddo
+    !$OMP END PARALLEL
+
+  endif
 end
 
 ! V_3idx_i_kl
@@ -1581,7 +2073,8 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
   integer                       :: tmp_i,tmp_j,tmp_k,tmp_l
   integer                       :: si,sj,sk,sl,s
 
-  PROVIDE cc_space_v
+  double precision, allocatable :: buffer(:,:,:)
+  double precision, allocatable :: v1(:,:,:), v2(:,:,:)
 
   if (idx_j <= n2_S(1)) then
     sj = 1
@@ -1592,98 +2085,265 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l
   tmp_j = idx_j - j_shift
   j = list2(tmp_j,sj)
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) &
-  !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, &
-  !$OMP i,k,l,idx_i,idx_k,idx_l,&
-  !$OMP tmp_i,tmp_k,tmp_l)&
-  !$OMP DEFAULT(NONE)
 
-  do sl = 1, 2
-    call shift_idx_spin(sl,n4_S,l_shift)
-    do sk = 1, 2
-      call shift_idx_spin(sk,n3_S,k_shift)
-      do si = 1, 2
-        call shift_idx_spin(si,n1_S,i_shift)
+  if (do_mo_cholesky) then
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sk = 1, 2
+        call shift_idx_spin(sk,n3_S,k_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
 
-        s = si+sj+sk+sl
-        ! <aa||aa> or <bb||bb>
-        if (s == 4 .or. s == 8) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
-                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl)))
+              allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_j(idx_i,idx_k,idx_l) = buffer(i,k,l)
+                    enddo
+                enddo
               enddo
-            enddo
-          enddo
-          !$OMP END DO
+              !$OMP END DO
+              !$OMP END PARALLEL
 
-        ! <ab||ab> or <ba||ba>
-        elseif (si == sk .and. sj == sl) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
-                   v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
+              deallocate(v1, v2, buffer)
 
-        ! <ab||ba> or <ba||ab>
-        elseif (si == sl .and. sj == sk) then
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
-                   v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l)
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        else
-          !$OMP DO collapse(2)
-          do tmp_l = 1, n4_S(sl)
-            do tmp_k = 1, n3_S(sk)
-              do tmp_i = 1, n1_S(si)
-                l = list4(tmp_l,sl)
-                idx_l = tmp_l + l_shift
-                k = list3(tmp_k,sk)
-                idx_k = tmp_k + k_shift
-                i = list1(tmp_i,si)
-                idx_i = tmp_i + i_shift
-                   v_j(idx_i,idx_k,idx_l) = 0d0
-              enddo
-            enddo
-          enddo
-          !$OMP END DO
-        endif
+              allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl)))
 
+              call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_j(idx_i,idx_k,idx_l) = v_j(idx_i,idx_k,idx_l) - buffer(k,i,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+
+              allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl)))
+              allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl)))
+
+              call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_j(idx_i,idx_k,idx_l) = buffer(i,k,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+
+              allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl)))
+              allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl)))
+
+              call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num)
+              call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num)
+
+              call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, &
+                    v1, cholesky_mo_num, &
+                    v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk))
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_j(idx_i,idx_k,idx_l) = buffer(k,i,l)
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+              deallocate(v1, v2, buffer)
+
+          else
+
+              !$OMP PARALLEL &
+              !$OMP DEFAULT(SHARED) &
+              !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l)
+              !$OMP DO collapse(2)
+              do l = 1, n4_S(sl)
+                do k = 1, n3_S(sk)
+                    idx_l = l + l_shift
+                    idx_k = k + k_shift
+                    do i = 1, n1_S(si)
+                      idx_i = i + i_shift
+                      v_j(idx_i,idx_k,idx_l) = 0d0
+                    enddo
+                enddo
+              enddo
+              !$OMP END DO
+              !$OMP END PARALLEL
+
+          endif
+
+        enddo
       enddo
     enddo
-  enddo
-  !$OMP END PARALLEL
 
+
+
+  else
+
+    PROVIDE cc_space_v
+
+    !$OMP PARALLEL &
+    !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) &
+    !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, &
+    !$OMP i,k,l,idx_i,idx_k,idx_l,&
+    !$OMP tmp_i,tmp_k,tmp_l)&
+    !$OMP DEFAULT(NONE)
+
+    do sl = 1, 2
+      call shift_idx_spin(sl,n4_S,l_shift)
+      do sk = 1, 2
+        call shift_idx_spin(sk,n3_S,k_shift)
+        do si = 1, 2
+          call shift_idx_spin(si,n1_S,i_shift)
+
+          s = si+sj+sk+sl
+          ! <aa||aa> or <bb||bb>
+          if (s == 4 .or. s == 8) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l)
+                  v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ab> or <ba||ba>
+          elseif (si == sk .and. sj == sl) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l)
+                  v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+
+          ! <ab||ba> or <ba||ab>
+          elseif (si == sl .and. sj == sk) then
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l)
+                  v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l)
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          else
+            !$OMP DO collapse(2)
+            do tmp_l = 1, n4_S(sl)
+              do tmp_k = 1, n3_S(sk)
+                l = list4(tmp_l,sl)
+                idx_l = tmp_l + l_shift
+                k = list3(tmp_k,sk)
+                idx_k = tmp_k + k_shift
+                do tmp_i = 1, n1_S(si)
+                  i = list1(tmp_i,si)
+                  idx_i = tmp_i + i_shift
+                  v_j(idx_i,idx_k,idx_l) = 0d0
+                enddo
+              enddo
+            enddo
+            !$OMP END DO
+          endif
+
+        enddo
+      enddo
+    enddo
+    !$OMP END PARALLEL
+
+  endif
 end

From 3d46cde2e46c8f6dd0eb0d56d5ff446d649907bb Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 5 Feb 2025 13:35:38 +0100
Subject: [PATCH 15/59] Cleaning ccsd_spin

---
 src/ccsd/ccsd_space_orb_sub.irp.f  |  10 +-
 src/ccsd/ccsd_spin_orb_sub.irp.f   | 156 ++++++-----------------------
 src/utils_cc/mo_integrals_cc.irp.f | 101 +++++++++++++++++++
 3 files changed, 131 insertions(+), 136 deletions(-)

diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f
index 30f134fc..7cd4c50c 100644
--- a/src/ccsd/ccsd_space_orb_sub.irp.f
+++ b/src/ccsd/ccsd_space_orb_sub.irp.f
@@ -26,7 +26,6 @@ subroutine run_ccsd_space_orb
 
   double precision, allocatable :: all_err(:,:), all_t(:,:)
   integer, allocatable          :: list_occ(:), list_vir(:)
-  integer(bit_kind)             :: det(N_int,2)
   integer                       :: nO, nV, nOa, nVa
 
   call set_multiple_levels_omp(.False.)
@@ -38,9 +37,8 @@ subroutine run_ccsd_space_orb
     PROVIDE all_mo_integrals
   endif
 
-  det = psi_det(:,:,cc_ref)
   print*,'Reference determinant:'
-  call print_det(det,N_int)
+  call print_det(psi_det(1,1,cc_ref),N_int)
 
   nOa = cc_nOa
   nVa = cc_nVa
@@ -57,10 +55,6 @@ subroutine run_ccsd_space_orb
   allocate(list_occ(nO),list_vir(nV))
   list_occ = cc_list_occ
   list_vir = cc_list_vir
-  ! Debug
-  !call extract_list_orb_space(det,nO,nV,list_occ,list_vir)
-  !print*,'occ',list_occ
-  !print*,'vir',list_vir
 
   ! GPU arrays
   call gpu_allocate(d_cc_space_f_oo, nO, nO)
@@ -186,7 +180,7 @@ subroutine run_ccsd_space_orb
 
   call update_tau_space(nO,nV,h_t1,t1,t2,tau)
   call update_tau_x_space(nO,nV,tau,tau_x)
-  call det_energy(det,uncorr_energy)
+  call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
   print*,'Det energy', uncorr_energy
 
   call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f
index fe202ebf..fa0983cd 100644
--- a/src/ccsd/ccsd_spin_orb_sub.irp.f
+++ b/src/ccsd/ccsd_spin_orb_sub.irp.f
@@ -14,65 +14,31 @@ subroutine run_ccsd_spin_orb
   double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:)
 
   double precision, allocatable :: f_o(:), f_v(:)
-  double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:)
-  double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:)
-  double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:)
-  double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:)
-  double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:)
-  double precision, allocatable :: v_vvvv(:,:,:,:)
+!  double precision, allocatable :: v_ovvv(:,:,:,:)
 
   double precision, allocatable :: all_err(:,:), all_t(:,:)
 
   logical                       :: not_converged
-  integer                       :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4)
+  integer                       :: n_spin(4)
   integer                       :: nb_iter, i,j,a,b
   double precision              :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
-  integer(bit_kind)             :: det(N_int,2)
 
-  det = psi_det(:,:,cc_ref)
+  if (do_mo_cholesky) then
+    PROVIDE cholesky_mo_transp
+    FREE cholesky_ao
+  else
+    PROVIDE all_mo_integrals
+  endif
+
   print*,'Reference determinant:'
-  call print_det(det,N_int)
-
-  ! Extract number of occ/vir alpha/beta spin orbitals
-  !call extract_n_spin(det,n_spin)
-  nOa = cc_nOa !n_spin(1)
-  nOb = cc_nOb !n_spin(2)
-  nVa = cc_nVa !n_spin(3)
-  nVb = cc_nVb !n_spin(4)
-
-  ! Number of occ/vir spin orb per spin
-  nO_S = cc_nO_S !(/nOa,nOb/)
-  nV_S = cc_nV_S !(/nVa,nVb/)
-  ! Debug
-  !print*,nO_S,nV_S
-
-  ! Maximal number of occ/vir
-  nO_m = cc_nO_m !max(nOa, nOb)
-  nV_m = cc_nV_m !max(nVa, nVb)
-  ! Debug
-  !print*,nO_m,nV_m
+  call print_det(psi_det(1,1,cc_ref),N_int)
 
   ! Allocation
   allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
   allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
   allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab))
   allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
-  allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab))
-  !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab))
-  allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab))
-  allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab))
-  allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab))
-  allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab))
-  !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab))
-  !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab))
-  allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))
-  allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab))
-  allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
-  !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab))
-  !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab))
-  !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab))
-  !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab))
-  !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
+
   allocate(f_o(cc_nOab), f_v(cc_nVab))
 
   ! Allocation for the diis
@@ -90,45 +56,20 @@ subroutine run_ccsd_spin_orb
     f_v(i) = cc_spin_f_vv(i,i)
   enddo
 
-  ! Bi electronic integrals from list
-  call wall_time(ti)
-  ! OOOO
-  call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo)
-
-  ! OOO V
-  !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo)
-  call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo)
-  call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo)
-  call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov)
-
-  ! OO VV
-  call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo)
-  !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo)
-  !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov)
-  call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo)
-  call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov)
-  call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv)
-
-  ! O VVV
-  !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo)
-  !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov)
-  !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv)
-  !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv)
-
-  ! VVVV
-  !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv)
-  call wall_time(tf)
-  if (cc_dev) then
-    print*,'Load bi elec int:',tf-ti,'s'
-  endif
 
   ! Init of T
   t1 = 0d0
   call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1)
-  call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2)
+  call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2)
   call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
   call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
 
+  call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
+  print*,'Det energy', uncorr_energy
+
+  call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
+  print*,'guess energy', uncorr_energy+energy, energy
+
   ! Loop init
   nb_iter = 0
   not_converged = .True.
@@ -137,11 +78,6 @@ subroutine run_ccsd_spin_orb
   max_r1 = 0d0
   max_r2 = 0d0
 
-  call det_energy(det,uncorr_energy)
-  print*,'Det energy', uncorr_energy
-  call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy)
-  print*,'guess energy', uncorr_energy+energy, energy
-
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
   write(*,'(A77)') ' |   It.  |       E(CCSD) (Ha) | Correlation (Ha) |  Conv. T1  |  Conv. T2  |'
   write(*,'(A77)') ' -----------------------------------------------------------------------------'
@@ -152,38 +88,16 @@ subroutine run_ccsd_spin_orb
   do while (not_converged)
 
     ! Intermediates
-    call wall_time(tbi)
-    call wall_time(ti)
-    call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo)
-    call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov)
-    call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv)
-    call wall_time(tf)
-    if (cc_dev) then
-      print*,'Compute cFs:',tf-ti,'s'
-    endif
+    call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo)
+    call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov)
+    call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv)
 
-    call wall_time(ti)
-    call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo)
-    call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo)
-    !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv)
-    call wall_time(tf)
-    if (cc_dev) then
-      print*,'Compute cFs:',tf-ti,'s'
-    endif
+    call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo)
+    call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo)
 
     ! Residuals
-    call wall_time(ti)
-    call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1)
-    call wall_time(tf)
-    if (cc_dev) then
-      print*,'Compute r1:',tf-ti,'s'
-    endif
-    call wall_time(ti)
-    call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2)
-    call wall_time(tf)
-    if (cc_dev) then
-      print*,'Compute r2:',tf-ti,'s'
-    endif
+    call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1)
+    call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2)
 
     ! Max elements in the residuals
     max_r1 = maxval(abs(r1(:,:)))
@@ -213,7 +127,7 @@ subroutine run_ccsd_spin_orb
     endif
 
     ! Print
-    call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy)
+    call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
     call wall_time(tfi)
 
     write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', &
@@ -258,25 +172,13 @@ subroutine run_ccsd_spin_orb
   deallocate(r1,r2)
   deallocate(cF_oo,cF_ov,cF_vv)
   deallocate(cW_oooo,cW_ovvo)!,cW_vvvv)
-  deallocate(v_oooo)
-  deallocate(v_ovoo,v_oovo)
-  deallocate(v_ovvo,v_ovov,v_oovv)
 
   double precision :: t_corr
   t_corr = 0.d0
   if (cc_par_t .and. elec_alpha_num  +elec_beta_num > 2) then
     print*,'CCSD(T) calculation...'
     call wall_time(ta)
-    !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab))
-    !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, &
-    !   cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, &
-    !   cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
-    !   cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo)
-
-    !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr)
-    call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr)
-    !print*,'Working on it...'
-    !call abort
+    call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr)
     call wall_time(tb)
     print*,'Done'
     print*,'Time: ',tb-ta, ' s'
@@ -290,9 +192,7 @@ subroutine run_ccsd_spin_orb
   call save_energy(uncorr_energy + energy, t_corr)
 
   deallocate(f_o,f_v)
-  deallocate(v_ooov,v_vvoo,t1,t2)
-  !deallocate(v_ovvv,v_vvvo,v_vovv)
-  !deallocate(v_vvvv)
+  deallocate(t1,t2)
 
 end
 
diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f
index 813c186a..4053c49b 100644
--- a/src/utils_cc/mo_integrals_cc.irp.f
+++ b/src/utils_cc/mo_integrals_cc.irp.f
@@ -227,6 +227,19 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [double precision, cc_spin_v_oooo, (cc_nOab, cc_nOab, cc_nOab, cc_nOab)]
+
+  implicit none
+
+  !TODO
+  call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nO_m, &
+                    cc_nO_S,cc_nO_S,cc_nO_S,cc_nO_S, &
+                    cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, &
+                    cc_nOab,cc_nOab,cc_nOab,cc_nOab, &
+                    cc_spin_v_oooo)
+
+END_PROVIDER
+
 ! vooo
 
 BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)]
@@ -305,6 +318,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_ovoo, (cc_nOab,cc_nVab,cc_nOab,cc_nOab)]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nO_m, &
+                 cc_nO_S,cc_nV_S,cc_nO_S,cc_nO_S, &
+                 cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, &
+                 cc_nOab,cc_nVab,cc_nOab,cc_nOab, &
+                 cc_spin_v_ovoo)
+
+END_PROVIDER
+
 ! oovo
 
 BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)]
@@ -339,6 +363,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_oovo, (cc_nOab,cc_nOab,cc_nVab,cc_nOab)]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nO_m, &
+                 cc_nO_S,cc_nO_S,cc_nV_S,cc_nO_S, &
+                 cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, &
+                 cc_nOab,cc_nOab,cc_nVab,cc_nOab, &
+                 cc_spin_v_oovo)
+END_PROVIDER
+
+
 ! ooov
 
 BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)]
@@ -373,6 +408,17 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_ooov, (cc_nOab,cc_nOab,cc_nOab,cc_nVab)]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nV_m, &
+                 cc_nO_S,cc_nO_S,cc_nO_S,cc_nV_S, &
+                 cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, &
+                 cc_nOab,cc_nOab,cc_nOab,cc_nVab, &
+                 cc_spin_v_ooov)
+END_PROVIDER
+
+
 ! vvoo
 
 BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)]
@@ -416,6 +462,18 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n
 
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, cc_spin_v_vvoo, (cc_nVab,cc_nVab,cc_nOab,cc_nOab)]
+ implicit none
+
+ call gen_v_spin(cc_nV_m,cc_nV_m,cc_nO_m,cc_nO_m, &
+                 cc_nV_S,cc_nV_S,cc_nO_S,cc_nO_S, &
+                 cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, &
+                 cc_nVab,cc_nVab,cc_nOab,cc_nOab, &
+                 cc_spin_v_vvoo)
+END_PROVIDER
+
+
 ! vovo
 
 BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)]
@@ -527,6 +585,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_ovvo, (cc_nOab,cc_nVab,cc_nVab,cc_nOab) ]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nV_m,cc_nV_m,cc_nO_m, &
+                 cc_nO_S,cc_nV_S,cc_nV_S,cc_nO_S, &
+                 cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, &
+                 cc_nOab,cc_nVab,cc_nVab,cc_nOab, &
+                 cc_spin_v_ovvo)
+END_PROVIDER
+
 ! ovov
 
 BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)]
@@ -561,6 +629,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_ovov, (cc_nOab,cc_nVab,cc_nOab,cc_nVab) ]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nV_m, &
+                 cc_nO_S,cc_nV_S,cc_nO_S,cc_nV_S, &
+                 cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, &
+                 cc_nOab,cc_nVab,cc_nOab,cc_nVab, &
+                 cc_spin_v_ovov)
+END_PROVIDER
+
 ! oovv
 
 BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)]
@@ -595,6 +673,16 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, cc_spin_v_oovv, (cc_nOab,cc_nOab,cc_nVab,cc_nVab) ]
+ implicit none
+
+ call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nV_m, &
+                 cc_nO_S,cc_nO_S,cc_nV_S,cc_nV_S, &
+                 cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, &
+                 cc_nOab,cc_nOab,cc_nVab,cc_nVab, &
+                 cc_spin_v_oovv)
+END_PROVIDER
+
 ! vvvo
 
 BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)]
@@ -625,6 +713,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_n
 
 END_PROVIDER
 
+
 ! ovvv
 
 BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)]
@@ -677,6 +766,18 @@ BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa,
 
 END_PROVIDER
 
+BEGIN_PROVIDER [double precision, cc_spin_v_oo_chol, (cholesky_mo_num, cc_nOab, cc_nOab)]
+
+  implicit none
+  integer :: list_occ(cc_nOab)
+
+  list_occ(1:cc_nOa) = cc_list_occ_spin(1:cc_nOa,1)
+  list_occ(cc_nOa+1:cc_nOab) = cc_list_occ_spin(1:cc_nOb,2)
+  call gen_v_space_chol(cc_nOab, cc_nOab, list_occ, list_occ, &
+         cc_spin_v_oo_chol, cholesky_mo_num)
+
+END_PROVIDER
+
 ! ppqq
 
 BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]

From 243ee0ed1457d0bb3ea725a2676620584e2b27db Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 5 Feb 2025 13:59:37 +0100
Subject: [PATCH 16/59] Introduced GPU arrays

---
 src/ccsd/ccsd_space_orb_sub.irp.f |  5 +-
 src/ccsd/ccsd_spin_orb_sub.irp.f  | 89 ++++++++++++++++++-------------
 2 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f
index 7cd4c50c..af7e9285 100644
--- a/src/ccsd/ccsd_space_orb_sub.irp.f
+++ b/src/ccsd/ccsd_space_orb_sub.irp.f
@@ -177,9 +177,11 @@ subroutine run_ccsd_space_orb
   call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2)
   call gpu_upload(h_t2, t2)
 
+  deallocate(h_t1, h_t2)
 
-  call update_tau_space(nO,nV,h_t1,t1,t2,tau)
+  call update_tau_space(nO,nV,t1%f,t1,t2,tau)
   call update_tau_x_space(nO,nV,tau,tau_x)
+
   call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
   print*,'Det energy', uncorr_energy
 
@@ -310,7 +312,6 @@ subroutine run_ccsd_space_orb
 
   call save_energy(uncorr_energy + energy, e_t)
 
-  deallocate(h_t1, h_t2)
   if (do_mo_cholesky) then
     call gpu_deallocate(d_cc_space_v_oo_chol)
     call gpu_deallocate(d_cc_space_v_ov_chol)
diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f
index fa0983cd..16062356 100644
--- a/src/ccsd/ccsd_spin_orb_sub.irp.f
+++ b/src/ccsd/ccsd_spin_orb_sub.irp.f
@@ -1,6 +1,5 @@
-! Code
-
 subroutine run_ccsd_spin_orb
+  use gpu
 
   implicit none
 
@@ -8,8 +7,6 @@ subroutine run_ccsd_spin_orb
   ! CCSD in spin orbitals
   END_DOC
 
-  double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:)
-  double precision, allocatable :: r1(:,:), r2(:,:,:,:)
   double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:)
   double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:)
 
@@ -23,6 +20,9 @@ subroutine run_ccsd_spin_orb
   integer                       :: nb_iter, i,j,a,b
   double precision              :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi
 
+  type(gpu_double4) :: t2, r2, tau, tau_t
+  type(gpu_double2) :: t1, r1
+
   if (do_mo_cholesky) then
     PROVIDE cholesky_mo_transp
     FREE cholesky_ao
@@ -34,13 +34,18 @@ subroutine run_ccsd_spin_orb
   call print_det(psi_det(1,1,cc_ref),N_int)
 
   ! Allocation
-  allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
-  allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
   allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab))
   allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab))
 
   allocate(f_o(cc_nOab), f_v(cc_nVab))
 
+  call gpu_allocate(t1, cc_nOab,cc_nVab)
+  call gpu_allocate(r1, cc_nOab,cc_nVab)
+  call gpu_allocate(t2, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
+  call gpu_allocate(r2, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
+  call gpu_allocate(tau,   cc_nOab,cc_nOab,cc_nVab,cc_nVab)
+  call gpu_allocate(tau_t, cc_nOab,cc_nOab,cc_nVab,cc_nVab)
+
   ! Allocation for the diis
   if (cc_update_method == 'diis') then
     allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth))
@@ -58,23 +63,29 @@ subroutine run_ccsd_spin_orb
 
 
   ! Init of T
-  t1 = 0d0
-  call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1)
-  call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2)
-  call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
-  call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
+  double precision, allocatable :: h_t1(:,:),  h_t2(:,:,:,:)
+  allocate(h_t1(cc_nOab,cc_nVab), h_t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab))
+  h_t1 = 0d0
+  call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,h_t1)
+  call gpu_upload(h_t1, t1)
+
+  call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,h_t2)
+  call gpu_upload(h_t2, t2)
+
+  deallocate(h_t1,h_t2)
+
+  call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f)
+  call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f)
 
   call det_energy(psi_det(1,1,cc_ref),uncorr_energy)
   print*,'Det energy', uncorr_energy
 
-  call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
+  call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy)
   print*,'guess energy', uncorr_energy+energy, energy
 
   ! Loop init
   nb_iter = 0
   not_converged = .True.
-  r1 = 0d0
-  r2 = 0d0
   max_r1 = 0d0
   max_r2 = 0d0
 
@@ -88,46 +99,46 @@ subroutine run_ccsd_spin_orb
   do while (not_converged)
 
     ! Intermediates
-    call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo)
-    call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov)
-    call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv)
+    call compute_cF_oo(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo)
+    call compute_cF_ov(cc_nOab,cc_nVab,t1%f,cc_spin_F_ov,cc_spin_v_oovv,cF_ov)
+    call compute_cF_vv(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv)
 
-    call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo)
-    call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo)
+    call compute_cW_oooo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo)
+    call compute_cW_ovvo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo)
 
     ! Residuals
-    call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1)
-    call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2)
+    call compute_r1_spin(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1%f)
+    call compute_r2_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2%f)
 
     ! Max elements in the residuals
-    max_r1 = maxval(abs(r1(:,:)))
-    max_r2 = maxval(abs(r2(:,:,:,:)))
+    max_r1 = maxval(abs(r1%f(:,:)))
+    max_r2 = maxval(abs(r2%f(:,:,:,:)))
     max_r  = max(max_r1,max_r2)
 
     call wall_time(ti)
     ! Update
     if (cc_update_method == 'diis') then
-      !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
-      !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
-      call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t)
+      !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2)
+      !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2)
+      call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t)
 
     ! Standard update as T = T - Delta
     elseif (cc_update_method == 'none') then
-      call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1)
-      call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2)
+      call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1%f,t1%f)
+      call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2%f,t2%f)
     else
       print*,'Unkonw cc_method_method: '//cc_update_method
     endif
 
-    call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau)
-    call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t)
+    call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f)
+    call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f)
     call wall_time(tf)
     if (cc_dev) then
       print*,'Update:',tf-ti,'s'
     endif
 
     ! Print
-    call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy)
+    call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy)
     call wall_time(tfi)
 
     write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', &
@@ -159,8 +170,8 @@ subroutine run_ccsd_spin_orb
   print*,''
 
   if (write_amplitudes) then
-    call write_t1(cc_nOab,cc_nVab,t1)
-    call write_t2(cc_nOab,cc_nVab,t2)
+    call write_t1(cc_nOab,cc_nVab,t1%f)
+    call write_t2(cc_nOab,cc_nVab,t2%f)
     call ezfio_set_utils_cc_io_amplitudes('Read')
   endif
 
@@ -168,8 +179,6 @@ subroutine run_ccsd_spin_orb
   if (cc_update_method == 'diis') then
      deallocate(all_err,all_t)
   endif
-  deallocate(tau,tau_t)
-  deallocate(r1,r2)
   deallocate(cF_oo,cF_ov,cF_vv)
   deallocate(cW_oooo,cW_ovvo)!,cW_vvvv)
 
@@ -178,7 +187,7 @@ subroutine run_ccsd_spin_orb
   if (cc_par_t .and. elec_alpha_num  +elec_beta_num > 2) then
     print*,'CCSD(T) calculation...'
     call wall_time(ta)
-    call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr)
+    call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr)
     call wall_time(tb)
     print*,'Done'
     print*,'Time: ',tb-ta, ' s'
@@ -192,7 +201,13 @@ subroutine run_ccsd_spin_orb
   call save_energy(uncorr_energy + energy, t_corr)
 
   deallocate(f_o,f_v)
-  deallocate(t1,t2)
+
+  call gpu_deallocate(t1)
+  call gpu_deallocate(r1)
+  call gpu_deallocate(t2)
+  call gpu_deallocate(r2)
+  call gpu_deallocate(tau)
+  call gpu_deallocate(tau_t)
 
 end
 

From 6b597c5ceeb7ccb12e32f477c36989c957c7189a Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 11:49:42 +0100
Subject: [PATCH 17/59] Optimization in CASSCF

---
 src/casscf_cipsi/chol_bielec.irp.f | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f
index f69832c1..a22ad7f3 100644
--- a/src/casscf_cipsi/chol_bielec.irp.f
+++ b/src/casscf_cipsi/chol_bielec.irp.f
@@ -191,10 +191,15 @@ double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca)
  END_DOC
  integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
  integer :: ii_ca, jj_ca
- double precision :: bielec_no_basis
  ii_ca = list_core_inact_act(i_ca)
  jj_ca = list_core_inact_act(j_ca)
- bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca)
+! double precision :: bielec_no_basis
+! bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca)
+ integer :: i
+ bielec_PQxx_no = 0.d0
+ do i = 1, cholesky_mo_num
+  bielec_PQxx_no = bielec_PQxx_no + cholesky_no_total_transp(i,i_mo, j_mo) * cholesky_no_total_transp(i,ii_ca,jj_ca)
+ enddo
 end
 
 double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo)
@@ -206,10 +211,15 @@ double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo)
   END_DOC
  integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
  integer :: ii_ca, jj_ca
- double precision :: bielec_no_basis
  ii_ca = list_core_inact_act(i_ca)
  jj_ca = list_core_inact_act(j_ca)
- bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo)
+ double precision :: bielec_no_basis
+! bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo)
+ integer :: i
+ bielec_PxxQ_no = 0.d0
+ do i = 1, cholesky_mo_num
+  bielec_PxxQ_no = bielec_PxxQ_no + cholesky_no_total_transp(i,i_mo, jj_ca) * cholesky_no_total_transp(i,ii_ca,j_mo)
+ enddo
 
 end
 

From 8c7184fb774706add1e56ed536cb3f18126aa79d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 11:50:11 +0100
Subject: [PATCH 18/59] Speed up cache in integrals

---
 src/davidson/u0_hs2_u0.irp.f          | 2 +-
 src/mo_two_e_ints/map_integrals.irp.f | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f
index f2ce7aa9..dd5e01eb 100644
--- a/src/davidson/u0_hs2_u0.irp.f
+++ b/src/davidson/u0_hs2_u0.irp.f
@@ -158,7 +158,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,
   double precision, intent(out)  :: v_t(N_st,sze), s_t(N_st,sze)
 
 
-  PROVIDE ref_bitmask_energy N_int
+  PROVIDE ref_bitmask_energy N_int all_mo_integrals
 
   select case (N_int)
     case (1)
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 5b233899..b5f78b7b 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -81,11 +81,15 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s
  integer(key_kind)              :: idx
  real(integral_kind)            :: integral
  FREE ao_integrals_cache
+
  if (do_mo_cholesky) then
 
    call set_multiple_levels_omp(.False.)
-   !$OMP PARALLEL DO PRIVATE (k,l,ii)
+
+
+   !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic)
    do l=mo_integrals_cache_min,mo_integrals_cache_max
+     print *, l
      do k=mo_integrals_cache_min,mo_integrals_cache_max
          ii = int(l-mo_integrals_cache_min,8)
          ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8))
@@ -101,7 +105,7 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s
    !$OMP END PARALLEL DO
 
  else
-   !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral)
+   !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) SCHEDULE(dynamic)
    do l=mo_integrals_cache_min,mo_integrals_cache_max
      do k=mo_integrals_cache_min,mo_integrals_cache_max
        do j=mo_integrals_cache_min,mo_integrals_cache_max

From 4792e86e4d724fedb20dea047d9f7ce0107f40d2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 12:31:31 +0100
Subject: [PATCH 19/59] DGEMM in mcscf_fock

---
 src/casscf_cipsi/mcscf_fock.irp.f | 118 +++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 44 deletions(-)

diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f
index 82b710a7..738dee2c 100644
--- a/src/casscf_cipsi/mcscf_fock.irp.f
+++ b/src/casscf_cipsi/mcscf_fock.irp.f
@@ -3,37 +3,52 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ]
    ! the inactive Fock matrix, in molecular orbitals
    END_DOC
    implicit none
-   integer                        :: p,q,k,kk,t,tt,u,uu
-   double precision :: bielec_pxxq_no, bielec_pqxx_no
-   
+   integer                        :: i,p,q,k,kk,t,tt,u,uu
+
    do q=1,mo_num
      do p=1,mo_num
        Fipq(p,q)=one_ints_no(p,q)
      end do
    end do
-   
+
    ! the inactive Fock matrix
    do k=1,n_core_inact_orb
-     kk=list_core_inact(k)
-     do q=1,mo_num
-       do p=1,mo_num
-         Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q)
-       end do
-     end do
+     kk=list_core_inact_act(k)
+!     do q=1,mo_num
+!       do p=1,mo_num
+!         do i=1,cholesky_mo_num
+!           Fipq(p,q) = Fipq(p,q) + 2.d0* cholesky_no_total_transp(i,p,q) * cholesky_no_total_transp(i,kk,kk)
+!         enddo
+!       end do
+!     end do
+     call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, 2.d0, &
+                 cholesky_no_total_transp, cholesky_mo_num, &
+                 cholesky_no_total_transp(1,kk,kk), cholesky_mo_num, 1.d0, &
+                 Fipq, mo_num*mo_num)
+!     do q=1,mo_num
+!       do p=1,mo_num
+!         do i=1,cholesky_mo_num
+!           Fipq(p,q) = Fipq(p,q) - cholesky_no_total_transp(i,p,kk) * cholesky_no_total_transp(i,kk,q)
+!         enddo
+!       end do
+!     end do
+     call dgemm('T','N', mo_num, mo_num, cholesky_mo_num, -1.d0, &
+                 cholesky_no_total_transp(1,1,kk), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,kk,1), cholesky_mo_num*mo_num, 1.d0, &
+                 Fipq, mo_num)
    end do
-   
+
    if (bavard) then
-     integer                        :: i
      write(6,*)
      write(6,*) ' the diagonal of the inactive effective Fock matrix '
      write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num)
      write(6,*)
    end if
-   
-   
+
+
 END_PROVIDER
- 
- 
+
+
 BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
    BEGIN_DOC
    ! the active active Fock matrix, in molecular orbitals
@@ -45,27 +60,42 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
    END_DOC
    implicit none
    integer                        :: p,q,k,kk,t,tt,u,uu
-   double precision :: bielec_pxxq_no, bielec_pqxx_no
-   
+
    Fapq = 0.d0
-   
+
    ! the active Fock matrix, D0tu is diagonal
    do t=1,n_act_orb
      tt=list_act(t)
-     do q=1,mo_num
-       do p=1,mo_num
-         Fapq(p,q)+=occnum(tt)                                       &
-             *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q))
-       end do
-     end do
+!     do q=1,mo_num
+!       do p=1,mo_num
+!         do i=1,cholesky_mo_num
+!           Fapq(p,q) = Fapq(p,q) + occnum(tt)* cholesky_no_total_transp(i,p,q) * cholesky_no_total_transp(i,tt,tt)
+!         enddo
+!       end do
+!     end do
+     call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, occnum(tt), &
+                 cholesky_no_total_transp, cholesky_mo_num, &
+                 cholesky_no_total_transp(1,tt,tt), cholesky_mo_num, 1.d0, &
+                 Fapq, mo_num*mo_num)
+!     do q=1,mo_num
+!       do p=1,mo_num
+!         do i=1,cholesky_mo_num
+!           Fapq(p,q) = Fapq(p,q) - 0.5d0*occnum(tt)*cholesky_no_total_transp(i,p,tt) * cholesky_no_total_transp(i,tt,q)
+!         enddo
+!       end do
+!     end do
+     call dgemm('T','N', mo_num, mo_num, cholesky_mo_num, -0.5d0*occnum(tt), &
+                 cholesky_no_total_transp(1,1,tt), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,tt,1), cholesky_mo_num*mo_num, 1.d0, &
+                 Fapq, mo_num)
    end do
-   
+
    if (bavard) then
      integer                        :: i
      write(6,*)
      write(6,*) ' the effective Fock matrix over MOs'
      write(6,*)
-     
+
      write(6,*)
      write(6,*) ' the diagonal of the inactive effective Fock matrix '
      write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num)
@@ -75,35 +105,35 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
      write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num)
      write(6,*)
    end if
-   
-   
+
+
 END_PROVIDER
- 
- BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_ao, (ao_num, ao_num)] 
-&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_ao, (ao_num, ao_num)] 
+
+ BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_ao, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_ao, (ao_num, ao_num)]
  implicit none
  BEGIN_DOC
-  ! mcscf_fock_alpha_ao are set to usual Fock like operator but computed with the MCSCF densities on the AO basis 
+  ! mcscf_fock_alpha_ao are set to usual Fock like operator but computed with the MCSCF densities on the AO basis
  END_DOC
  SCF_density_matrix_ao_alpha = D0tu_alpha_ao
  SCF_density_matrix_ao_beta = D0tu_beta_ao
- soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta 
+ soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta
  mcscf_fock_beta_ao = fock_matrix_ao_beta
  mcscf_fock_alpha_ao = fock_matrix_ao_alpha
-END_PROVIDER 
+END_PROVIDER
 
 
- BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_mo, (mo_num, mo_num)] 
-&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_mo, (mo_num, mo_num)] 
+ BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_mo, (mo_num, mo_num)]
  implicit none
  BEGIN_DOC
-  ! Mo_mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities on the MO basis 
+  ! Mo_mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities on the MO basis
  END_DOC
 
  call ao_to_mo(mcscf_fock_alpha_ao,ao_num,mcscf_fock_alpha_mo,mo_num)
  call ao_to_mo(mcscf_fock_beta_ao,ao_num,mcscf_fock_beta_mo,mo_num)
 
-END_PROVIDER 
+END_PROVIDER
 
  BEGIN_PROVIDER [ double precision, mcscf_fock_mo, (mo_num,mo_num) ]
 &BEGIN_PROVIDER [ double precision, mcscf_fock_diag_mo, (mo_num)]
@@ -118,13 +148,13 @@ END_PROVIDER
    !       |-----------------------|
    !       |  Fcv  |  F^a  |  Rvv  |
    !
-   ! C: Core, O: Open, V: Virtual 
-   ! 
+   ! C: Core, O: Open, V: Virtual
+   !
    ! Rcc = Acc Fcc^a + Bcc Fcc^b
    ! Roo = Aoo Foo^a + Boo Foo^b
    ! Rvv = Avv Fvv^a + Bvv Fvv^b
    ! Fcv = (F^a + F^b)/2
-   ! 
+   !
    ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO)
    ! A,B: Coupling parameters
    !
@@ -133,7 +163,7 @@ END_PROVIDER
    !       cc   oo   vv
    !  A  -0.5  0.5  1.5
    !  B   1.5  0.5 -0.5
-   ! 
+   !
    END_DOC
    integer                        :: i,j,n
    if (elec_alpha_num == elec_beta_num) then
@@ -194,4 +224,4 @@ END_PROVIDER
  do i = 1, mo_num
   mcscf_fock_diag_mo(i) = mcscf_fock_mo(i,i)
  enddo
-END_PROVIDER 
+END_PROVIDER

From 077f32836b294f4c17d340a48528e1fa2082b9da Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 12:52:33 +0100
Subject: [PATCH 20/59] DGEMV in mcscf_fock

---
 src/casscf_cipsi/mcscf_fock.irp.f | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f
index 738dee2c..87cc062c 100644
--- a/src/casscf_cipsi/mcscf_fock.irp.f
+++ b/src/casscf_cipsi/mcscf_fock.irp.f
@@ -13,7 +13,7 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ]
 
    ! the inactive Fock matrix
    do k=1,n_core_inact_orb
-     kk=list_core_inact_act(k)
+     kk=list_core_inact(k)
 !     do q=1,mo_num
 !       do p=1,mo_num
 !         do i=1,cholesky_mo_num
@@ -21,10 +21,11 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ]
 !         enddo
 !       end do
 !     end do
-     call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, 2.d0, &
+     call dgemv('T', cholesky_mo_num, mo_num*mo_num, 2.d0, &
                  cholesky_no_total_transp, cholesky_mo_num, &
-                 cholesky_no_total_transp(1,kk,kk), cholesky_mo_num, 1.d0, &
-                 Fipq, mo_num*mo_num)
+                 cholesky_no_total_transp(1,kk,kk), 1, 1.d0, &
+                 Fipq, 1)
+
 !     do q=1,mo_num
 !       do p=1,mo_num
 !         do i=1,cholesky_mo_num
@@ -73,10 +74,10 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
 !         enddo
 !       end do
 !     end do
-     call dgemm('T','N', mo_num*mo_num, 1, cholesky_mo_num, occnum(tt), &
+     call dgemv('T', cholesky_mo_num, mo_num*mo_num, occnum(tt), &
                  cholesky_no_total_transp, cholesky_mo_num, &
-                 cholesky_no_total_transp(1,tt,tt), cholesky_mo_num, 1.d0, &
-                 Fapq, mo_num*mo_num)
+                 cholesky_no_total_transp(1,tt,tt), 1, 1.d0, &
+                 Fapq, 1)
 !     do q=1,mo_num
 !       do p=1,mo_num
 !         do i=1,cholesky_mo_num

From 410ed1d5625c2f7837f9b94ea8ac455c8a70bf29 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 14:18:02 +0100
Subject: [PATCH 21/59] Fast 4idx in CASSCF

---
 src/casscf_cipsi/gradient.irp.f       |  59 ++++++++-------
 src/casscf_cipsi/hessian.irp.f        |  75 +++++++++----------
 src/casscf_cipsi/natorb.irp.f         | 102 ++++++--------------------
 src/mo_two_e_ints/map_integrals.irp.f |   2 -
 4 files changed, 92 insertions(+), 146 deletions(-)

diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f
index 961d260d..cd608fb3 100644
--- a/src/casscf_cipsi/gradient.irp.f
+++ b/src/casscf_cipsi/gradient.irp.f
@@ -14,8 +14,8 @@ END_PROVIDER
   implicit none
   n_c_a_prov = n_core_inact_orb * n_act_orb
   n_c_v_prov = n_core_inact_orb * n_virt_orb
-  n_a_v_prov = n_act_orb * n_virt_orb 
- END_PROVIDER 
+  n_a_v_prov = n_act_orb * n_virt_orb
+ END_PROVIDER
 
  BEGIN_PROVIDER [integer, excit, (2,nMonoEx)]
 &BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)]
@@ -28,7 +28,7 @@ END_PROVIDER
   BEGIN_DOC
   ! a list of the orbitals involved in the excitation
   END_DOC
-  
+
   implicit none
   integer                        :: i,t,a,ii,tt,aa,indx,indx_tmp
   indx=0
@@ -48,7 +48,7 @@ END_PROVIDER
       mat_idx_c_a(ii,tt) = indx
     end do
   end do
-  
+
   indx_tmp = 0
   do ii=1,n_core_inact_orb
     i=list_core_inact(ii)
@@ -61,11 +61,11 @@ END_PROVIDER
       indx_tmp += 1
       list_idx_c_v(1,indx_tmp) = indx
       list_idx_c_v(2,indx_tmp) = ii
-      list_idx_c_v(3,indx_tmp) = aa 
+      list_idx_c_v(3,indx_tmp) = aa
       mat_idx_c_v(ii,aa) = indx
     end do
   end do
-  
+
   indx_tmp = 0
   do tt=1,n_act_orb
     t=list_act(tt)
@@ -82,7 +82,7 @@ END_PROVIDER
       mat_idx_a_v(tt,aa) = indx
     end do
   end do
-  
+
   if (bavard) then
     write(6,*) ' Filled the table of the Monoexcitations '
     do indx=1,nMonoEx
@@ -90,7 +90,7 @@ END_PROVIDER
           ,excit(2,indx),'  ',excit_class(indx)
     end do
   end if
-  
+
 END_PROVIDER
 
  BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)]
@@ -104,7 +104,7 @@ END_PROVIDER
   implicit none
   integer                        :: i,t,a,indx
   real*8                         :: gradvec_it,gradvec_ia,gradvec_ta
-  
+
   indx=0
   norm_grad_vec2_tab = 0.d0
   do i=1,n_core_inact_orb
@@ -114,7 +114,7 @@ END_PROVIDER
       norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx)
     end do
   end do
-  
+
   do i=1,n_core_inact_orb
     do a=1,n_virt_orb
       indx+=1
@@ -122,7 +122,7 @@ END_PROVIDER
       norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx)
     end do
   end do
-  
+
   do t=1,n_act_orb
     do a=1,n_virt_orb
       indx+=1
@@ -130,7 +130,7 @@ END_PROVIDER
       norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx)
     end do
   end do
-  
+
   norm_grad_vec2=0.d0
   do indx=1,nMonoEx
     norm_grad_vec2+=gradvec2(indx)*gradvec2(indx)
@@ -144,7 +144,7 @@ END_PROVIDER
    write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2
    write(6,*)
   endif
-  
+
 END_PROVIDER
 
 real*8 function gradvec_it(i,t)
@@ -154,23 +154,30 @@ real*8 function gradvec_it(i,t)
   END_DOC
   implicit none
   integer                        :: i,t
-  
+
   integer                        :: ii,tt,v,vv,x,y
   integer                        :: x3,y3
   double precision :: bielec_PQxx_no
-  
+
   ii=list_core_inact(i)
   tt=list_act(t)
   gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii))
   gradvec_it-=occnum(tt)*Fipq(ii,tt)
-  do v=1,n_act_orb ! active 
-    vv=list_act(v)
-    do x=1,n_act_orb ! active 
-      x3=x+n_core_inact_orb ! list_act(x) 
-      do y=1,n_act_orb ! active
-        y3=y+n_core_inact_orb ! list_act(y)
+  do y=1,n_act_orb ! active
+!   y3=y+n_core_inact_orb ! list_act(y)
+    do x=1,n_act_orb ! active
+!      x3=x+n_core_inact_orb ! list_act(x)
+      do v=1,n_act_orb ! active
+        vv=list_act(v)
         !                Gamma(2)  a a a a      1/r12      i  a  a  a
-        gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3)
+!        gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3)
+         integer :: ichol
+         double precision :: tmp
+         tmp = 0.d0
+         do ichol=1,cholesky_mo_num
+           tmp = tmp + cholesky_no_total_transp(ichol,vv,ii) * cholesky_no_total_transp(ichol,list_act(x),list_act(y))
+         enddo
+         gradvec_it = gradvec_it - 2.D0*P0tuvx_no(t,v,x,y)*tmp
       end do
     end do
   end do
@@ -183,12 +190,12 @@ real*8 function gradvec_ia(i,a)
   END_DOC
   implicit none
   integer                        :: i,a,ii,aa
-  
+
   ii=list_core_inact(i)
   aa=list_virt(a)
   gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii))
   gradvec_ia*=2.D0
-  
+
 end function gradvec_ia
 
 real*8 function gradvec_ta(t,a)
@@ -198,7 +205,7 @@ real*8 function gradvec_ta(t,a)
   END_DOC
   implicit none
   integer                        :: t,a,tt,aa,v,vv,x,y
-  
+
   tt=list_act(t)
   aa=list_virt(a)
   gradvec_ta=0.D0
@@ -211,6 +218,6 @@ real*8 function gradvec_ta(t,a)
     end do
   end do
   gradvec_ta*=2.D0
-  
+
 end function gradvec_ta
 
diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f
index 1ee073d2..431a6979 100644
--- a/src/casscf_cipsi/hessian.irp.f
+++ b/src/casscf_cipsi/hessian.irp.f
@@ -11,13 +11,14 @@ real*8 function hessmat_itju(i,t,j,u)
   integer                        :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj
   real*8                         :: term,t2
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   ii=list_core_inact(i)
   tt=list_act(t)
   if (i.eq.j) then
     if (t.eq.u) then
       ! diagonal element
-      term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt))     &
+      term = occnum(tt)*Fipq(ii,ii) +      &
+           2.D0*(Fipq(tt,tt)+Fapq(tt,tt))     &
           -2.D0*(Fipq(ii,ii)+Fapq(ii,ii))
       term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i))
       term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt)             &
@@ -83,10 +84,10 @@ real*8 function hessmat_itju(i,t,j,u)
       end do
     end do
   end if
-  
+
   term*=2.D0
   hessmat_itju=term
-  
+
 end function hessmat_itju
 
 real*8 function hessmat_itja(i,t,j,a)
@@ -97,7 +98,7 @@ real*8 function hessmat_itja(i,t,j,a)
   integer                        :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y
   real*8                         :: term
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   ! it/ja
   ii=list_core_inact(i)
   tt=list_act(t)
@@ -120,7 +121,7 @@ real*8 function hessmat_itja(i,t,j,a)
   end if
   term*=2.D0
   hessmat_itja=term
-  
+
 end function hessmat_itja
 
 real*8 function hessmat_itua(i,t,u,a)
@@ -131,7 +132,7 @@ real*8 function hessmat_itua(i,t,u,a)
   integer                        :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3
   real*8                         :: term
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   ii=list_core_inact(i)
   tt=list_act(t)
   t3=t+n_core_inact_orb
@@ -162,7 +163,7 @@ real*8 function hessmat_itua(i,t,u,a)
   end if
   term*=2.D0
   hessmat_itua=term
-  
+
 end function hessmat_itua
 
 real*8 function hessmat_iajb(i,a,j,b)
@@ -173,7 +174,7 @@ real*8 function hessmat_iajb(i,a,j,b)
   integer                        :: i,a,j,b,ii,aa,jj,bb
   real*8                         :: term
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   ii=list_core_inact(i)
   aa=list_virt(a)
   if (i.eq.j) then
@@ -199,7 +200,7 @@ real*8 function hessmat_iajb(i,a,j,b)
   end if
   term*=2.D0
   hessmat_iajb=term
-  
+
 end function hessmat_iajb
 
 real*8 function hessmat_iatb(i,a,t,b)
@@ -210,7 +211,7 @@ real*8 function hessmat_iatb(i,a,t,b)
   integer                        :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3
   real*8                         :: term
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   ii=list_core_inact(i)
   aa=list_virt(a)
   tt=list_act(t)
@@ -231,7 +232,7 @@ real*8 function hessmat_iatb(i,a,t,b)
   end if
   term*=2.D0
   hessmat_iatb=term
-  
+
 end function hessmat_iatb
 
 real*8 function hessmat_taub(t,a,u,b)
@@ -243,7 +244,7 @@ real*8 function hessmat_taub(t,a,u,b)
   integer                        :: v3,x3
   real*8                         :: term,t1,t2,t3
   double precision :: bielec_pqxx_no,bielec_pxxq_no
-  
+
   tt=list_act(t)
   aa=list_virt(a)
   if (t == u) then
@@ -311,12 +312,12 @@ real*8 function hessmat_taub(t,a,u,b)
         end do
       end do
     end if
-    
+
   end if
-  
+
   term*=2.D0
   hessmat_taub=term
-  
+
 end function hessmat_taub
 
 BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)]
@@ -326,7 +327,7 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)]
   implicit none
   integer                        :: i,t,a,indx,indx_shift
   real*8                         :: hessmat_itju,hessmat_iajb,hessmat_taub
-  
+
   !$OMP PARALLEL DEFAULT(NONE) &
   !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) &
   !$OMP PRIVATE(i,indx,t,a,indx_shift)
@@ -339,9 +340,9 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)]
     end do
   end do
   !$OMP END DO NOWAIT
-  
+
   indx_shift = n_core_inact_orb*n_act_orb
-  !$OMP DO 
+  !$OMP DO
   do a=1,n_virt_orb
     do i=1,n_core_inact_orb
       indx = a + (i-1)*n_virt_orb + indx_shift
@@ -349,9 +350,9 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)]
     end do
   end do
   !$OMP END DO NOWAIT
-  
+
   indx_shift += n_core_inact_orb*n_virt_orb
-  !$OMP DO 
+  !$OMP DO
   do a=1,n_virt_orb
     do t=1,n_act_orb
       indx = a + (t-1)*n_virt_orb + indx_shift
@@ -360,7 +361,7 @@ BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)]
   end do
   !$OMP END DO
   !$OMP END PARALLEL
-  
+
 END_PROVIDER
 
 
@@ -377,7 +378,7 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
  real*8                         :: hessmat_taub
  !       c-a c-v a-v
  !  c-a | X   X  X
- !  c-v |     X  X 
+ !  c-v |     X  X
  !  a-v |        X
 
   provide all_mo_integrals
@@ -390,12 +391,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
 
  !$OMP DO
 !!!! < Core-active| H |Core-active >
- ! Core-active excitations 
+ ! Core-active excitations
  do indx_tmp = 1, n_c_a_prov
   indx = list_idx_c_a(1,indx_tmp)
   i    = list_idx_c_a(2,indx_tmp)
   t    = list_idx_c_a(3,indx_tmp)
-  ! Core-active excitations 
+  ! Core-active excitations
   do j = 1, n_core_inact_orb
    if (i.eq.j) then
      ustart=t
@@ -418,12 +419,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
 
  !$OMP DO
 !!!! < Core-active| H |Core-VIRTUAL >
- ! Core-active excitations 
+ ! Core-active excitations
  do indx_tmp = 1, n_c_a_prov
   indx = list_idx_c_a(1,indx_tmp)
   i    = list_idx_c_a(2,indx_tmp)
   t    = list_idx_c_a(3,indx_tmp)
-  ! Core-VIRTUAL excitations 
+  ! Core-VIRTUAL excitations
   do jndx_tmp = 1, n_c_v_prov
    jndx = list_idx_c_v(1,jndx_tmp)
    j    = list_idx_c_v(2,jndx_tmp)
@@ -441,12 +442,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
 
  !$OMP DO
 !!!! < Core-active| H |ACTIVE-VIRTUAL >
- ! Core-active excitations 
+ ! Core-active excitations
  do indx_tmp = 1, n_c_a_prov
   indx = list_idx_c_a(1,indx_tmp)
   i    = list_idx_c_a(2,indx_tmp)
   t    = list_idx_c_a(3,indx_tmp)
-  ! ACTIVE-VIRTUAL excitations 
+  ! ACTIVE-VIRTUAL excitations
   do jndx_tmp = 1, n_a_v_prov
    jndx = list_idx_a_v(1,jndx_tmp)
    u    = list_idx_a_v(2,jndx_tmp)
@@ -466,12 +467,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
  !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx)
   !$OMP DO
 !!!!! < Core-VIRTUAL | H |Core-VIRTUAL >
-  ! Core-VIRTUAL excitations 
+  ! Core-VIRTUAL excitations
   do indx_tmp = 1, n_c_v_prov
    indx = list_idx_c_v(1,indx_tmp)
    i    = list_idx_c_v(2,indx_tmp)
    a    = list_idx_c_v(3,indx_tmp)
-   ! Core-VIRTUAL excitations 
+   ! Core-VIRTUAL excitations
    do j = 1, n_core_inact_orb
     if (i.eq.j) then
       bstart=a
@@ -485,7 +486,7 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
     enddo
    enddo
   enddo
- 
+
   !$OMP END DO NOWAIT
   !$OMP END PARALLEL
  endif
@@ -496,12 +497,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
 
  !$OMP DO
 !!!! < Core-VIRTUAL | H |Active-VIRTUAL >
- ! Core-VIRTUAL excitations 
+ ! Core-VIRTUAL excitations
  do indx_tmp = 1, n_c_v_prov
   indx = list_idx_c_v(1,indx_tmp)
   i    = list_idx_c_v(2,indx_tmp)
   a    = list_idx_c_v(3,indx_tmp)
-  ! Active-VIRTUAL excitations 
+  ! Active-VIRTUAL excitations
   do jndx_tmp = 1, n_a_v_prov
    jndx = list_idx_a_v(1,jndx_tmp)
    t    = list_idx_a_v(2,jndx_tmp)
@@ -520,12 +521,12 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
 
  !$OMP DO
 !!!! < Active-VIRTUAL | H |Active-VIRTUAL >
- ! Active-VIRTUAL excitations 
+ ! Active-VIRTUAL excitations
  do indx_tmp = 1, n_a_v_prov
   indx = list_idx_a_v(1,indx_tmp)
   t    = list_idx_a_v(2,indx_tmp)
   a    = list_idx_a_v(3,indx_tmp)
-  ! Active-VIRTUAL excitations 
+  ! Active-VIRTUAL excitations
   do u=t,n_act_orb
    if (t.eq.u) then
      bstart=a
@@ -542,4 +543,4 @@ BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)]
   !$OMP END DO NOWAIT
   !$OMP END PARALLEL
 
-END_PROVIDER 
+END_PROVIDER
diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f
index 9ce90304..6376308d 100644
--- a/src/casscf_cipsi/natorb.irp.f
+++ b/src/casscf_cipsi/natorb.irp.f
@@ -72,84 +72,27 @@ BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)]
   BEGIN_DOC
   ! 4-index transformation of 2part matrices
   END_DOC
-  integer                        :: i,j,k,l,p,q
-  real*8                         :: d(n_act_orb)
 
-  ! index per index
-  ! first quarter
-  P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:)
+  double precision, allocatable :: tmp(:,:,:,:)
+  allocate(tmp(n_act_orb,n_act_orb,n_act_orb,n_act_orb))
 
-  do j=1,n_act_orb
-    do k=1,n_act_orb
-      do l=1,n_act_orb
-        do p=1,n_act_orb
-          d(p)=0.D0
-        end do
-        do p=1,n_act_orb
-          do q=1,n_act_orb
-            d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p)
-          end do
-        end do
-        do p=1,n_act_orb
-          P0tuvx_no(p,j,k,l)=d(p)
-        end do
-      end do
-    end do
-  end do
-  ! 2nd quarter
-  do j=1,n_act_orb
-    do k=1,n_act_orb
-      do l=1,n_act_orb
-        do p=1,n_act_orb
-          d(p)=0.D0
-        end do
-        do p=1,n_act_orb
-          do q=1,n_act_orb
-            d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p)
-          end do
-        end do
-        do p=1,n_act_orb
-          P0tuvx_no(j,p,k,l)=d(p)
-        end do
-      end do
-    end do
-  end do
-  ! 3rd quarter
-  do j=1,n_act_orb
-    do k=1,n_act_orb
-      do l=1,n_act_orb
-        do p=1,n_act_orb
-          d(p)=0.D0
-        end do
-        do p=1,n_act_orb
-          do q=1,n_act_orb
-            d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p)
-          end do
-        end do
-        do p=1,n_act_orb
-          P0tuvx_no(j,k,p,l)=d(p)
-        end do
-      end do
-    end do
-  end do
-  ! 4th quarter
-  do j=1,n_act_orb
-    do k=1,n_act_orb
-      do l=1,n_act_orb
-        do p=1,n_act_orb
-          d(p)=0.D0
-        end do
-        do p=1,n_act_orb
-          do q=1,n_act_orb
-            d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p)
-          end do
-        end do
-        do p=1,n_act_orb
-          P0tuvx_no(j,k,l,p)=d(p)
-        end do
-      end do
-    end do
-  end do
+  call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, &
+          P0tuvx, n_act_orb, natorbsCI, n_act_orb, 0.d0, &
+          tmp, (n_act_orb*n_act_orb*n_act_orb))
+
+  call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, &
+          tmp, n_act_orb, natorbsCI, n_act_orb, 0.d0, &
+          P0tuvx_no, (n_act_orb*n_act_orb*n_act_orb))
+
+  call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, &
+          P0tuvx_no, n_act_orb, natorbsCI, n_act_orb, 0.d0, &
+          tmp, (n_act_orb*n_act_orb*n_act_orb))
+
+  call dgemm('T','N',(n_act_orb*n_act_orb*n_act_orb), n_act_orb, n_act_orb, 1.d0, &
+          tmp, n_act_orb, natorbsCI, n_act_orb, 0.d0, &
+          P0tuvx_no, (n_act_orb*n_act_orb*n_act_orb))
+
+  deallocate(tmp)
 
 END_PROVIDER
 
@@ -160,6 +103,7 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)]
   BEGIN_DOC
   ! Transformed one-e integrals
   END_DOC
+
   integer :: i,j, p, q
   real*8 :: d(n_act_orb)
   one_ints_no(:,:)=mo_one_e_integrals(:,:)
@@ -168,10 +112,8 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)]
   do j=1,mo_num
     do p=1,n_act_orb
       d(p)=0.D0
-    end do
-    do p=1,n_act_orb
       do q=1,n_act_orb
-        d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p)
+        d(p) = d(p) + one_ints_no(list_act(q),j)*natorbsCI(q,p)
       end do
     end do
     do p=1,n_act_orb
@@ -183,8 +125,6 @@ BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)]
   do j=1,mo_num
     do p=1,n_act_orb
       d(p)=0.D0
-    end do
-    do p=1,n_act_orb
       do q=1,n_act_orb
         d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p)
       end do
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index b5f78b7b..5a005d7b 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -86,10 +86,8 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s
 
    call set_multiple_levels_omp(.False.)
 
-
    !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic)
    do l=mo_integrals_cache_min,mo_integrals_cache_max
-     print *, l
      do k=mo_integrals_cache_min,mo_integrals_cache_max
          ii = int(l-mo_integrals_cache_min,8)
          ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8))

From 8975617bf27e55f54d33c473a4c2ab927b6a9719 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 15:45:26 +0100
Subject: [PATCH 22/59] Dgemmized hessmat_taub

---
 src/casscf_cipsi/hessian.irp.f | 173 ++++++++++++++++++++++++++-------
 1 file changed, 138 insertions(+), 35 deletions(-)

diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f
index 431a6979..1bcb64b0 100644
--- a/src/casscf_cipsi/hessian.irp.f
+++ b/src/casscf_cipsi/hessian.irp.f
@@ -241,73 +241,176 @@ real*8 function hessmat_taub(t,a,u,b)
   END_DOC
   implicit none
   integer                        :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y
-  integer                        :: v3,x3
-  real*8                         :: term,t1,t2,t3
+  integer                        :: v3,x3, ichol
+  real*8                         :: term,t1,t2,t3, tmp
   double precision :: bielec_pqxx_no,bielec_pxxq_no
 
+  double precision, allocatable :: tmp1(:), tmp2(:,:)
+  allocate(tmp1(n_act_orb))
+  allocate(tmp2(n_act_orb,n_act_orb))
+
   tt=list_act(t)
   aa=list_virt(a)
+
   if (t == u) then
     if (a == b) then
       ! ta/ta
-      t1=occnum(tt)*Fipq(aa,aa)
+      t1=occnum(tt)*Fipq(aa,aa) - occnum(tt)*Fipq(tt,tt)
+
       t2=0.D0
-      t3=0.D0
-      t1-=occnum(tt)*Fipq(tt,tt)
+!      do x=1,n_act_orb
+!        x3=x+n_core_inact_orb
+!        do v=1,n_act_orb
+!          v3=v+n_core_inact_orb
+!          tmp = 0.d0
+!          do ichol = 1, cholesky_mo_num
+!            tmp = tmp + cholesky_no_total_transp(ichol,aa,aa) * cholesky_no_total_transp(ichol,v3,x3)
+!          enddo
+!          t2 = t2 + 2.D0*P0tuvx_no(t,t,v,x)*tmp
+!        enddo
+!      enddo
+
+      do x=1,n_act_orb
+        x3=x+n_core_inact_orb
+        call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,aa,aa), 1, 0.d0, &
+                 tmp1, 1)
+        do v=1,n_act_orb
+          t2 = t2 + P0tuvx_no(t,t,v,x)*tmp1(v)
+        enddo
+      enddo
+!      do v=1,n_act_orb
+!        v3=v+n_core_inact_orb
+!        do x=1,n_act_orb
+!          x3=x+n_core_inact_orb
+!          tmp = 0.d0
+!          do ichol = 1, cholesky_mo_num
+!            tmp = tmp + cholesky_no_total_transp(ichol,aa,x3) * cholesky_no_total_transp(ichol,v3,aa)
+!          enddo
+!          t2 = t2 + 2.d0*(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))*tmp
+!        end do
+!      end do
+      call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, 0.d0, &
+                 tmp2, n_act_orb)
       do v=1,n_act_orb
-        vv=list_act(v)
-        v3=v+n_core_inact_orb
         do x=1,n_act_orb
-          xx=list_act(x)
-          x3=x+n_core_inact_orb
-          t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3)      &
-              +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))*              &
-              bielec_pxxq_no(aa,x3,v3,aa))
-          do y=1,n_act_orb
-            t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx)
+          t2 = t2 + P0tuvx_no(t,x,v,t)*tmp2(x,v) + P0tuvx_no(t,x,t,v)*tmp2(x,v)
+        enddo
+      enddo
+
+      t3=0.D0
+      do x=1,n_act_orb
+        xx=list_act(x)
+        do y=1,n_act_orb
+          do v=1,n_act_orb
+            t3 = t3 - P0tuvx_no(t,v,x,y)*bielecCI_no(v,t,y,xx)
           end do
         end do
       end do
-      term=t1+t2+t3
+      term=t1+t2+t3*2.d0
+
     else
+
       bb=list_virt(b)
       ! ta/tb b/=a
       term=occnum(tt)*Fipq(aa,bb)
+!      do v=1,n_act_orb
+!        vv=list_act(v)
+!        v3=v+n_core_inact_orb
+!        do x=1,n_act_orb
+!          xx=list_act(x)
+!          x3=x+n_core_inact_orb
+!          term+=2.D0*P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3)
+!        end do
+!      end do
+      do x=1,n_act_orb
+        x3=x+n_core_inact_orb
+        call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,aa,bb), 1, 0.d0, &
+                 tmp1, 1)
+        do v=1,n_act_orb
+          term = term + P0tuvx_no(t,t,v,x)*tmp1(v)
+        enddo
+      enddo
+
+!      do v=1,n_act_orb
+!        vv=list_act(v)
+!        v3=v+n_core_inact_orb
+!        do x=1,n_act_orb
+!          xx=list_act(x)
+!          x3=x+n_core_inact_orb
+!          term+=2.d0*(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))*bielec_pxxq_no(aa,x3,v3,bb)
+!        end do
+!      end do
+      call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,bb), cholesky_mo_num, 0.d0, &
+                 tmp2, n_act_orb)
       do v=1,n_act_orb
-        vv=list_act(v)
-        v3=v+n_core_inact_orb
         do x=1,n_act_orb
-          xx=list_act(x)
-          x3=x+n_core_inact_orb
-          term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3)    &
-              +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))               &
-              *bielec_pxxq_no(aa,x3,v3,bb))
-        end do
-      end do
+          term = term + P0tuvx_no(t,x,v,t)*tmp2(x,v) + P0tuvx_no(t,x,t,v)*tmp2(x,v)
+        enddo
+      enddo
+
     end if
+
   else
+
     ! ta/ub t/=u
     uu=list_act(u)
     bb=list_virt(b)
+
     term=0.D0
+!    do v=1,n_act_orb
+!      vv=list_act(v)
+!      v3=v+n_core_inact_orb
+!      do x=1,n_act_orb
+!        xx=list_act(x)
+!        x3=x+n_core_inact_orb
+!        term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3)
+!      end do
+!    end do
+    do x=1,n_act_orb
+      x3=x+n_core_inact_orb
+      call dgemv('T', cholesky_mo_num, n_act_orb, 2.d0, &
+               cholesky_no_total_transp(1,n_core_inact_orb+1,x3), cholesky_mo_num, &
+               cholesky_no_total_transp(1,aa,bb), 1, 0.d0, &
+               tmp1, 1)
+      do v=1,n_act_orb
+        term = term + P0tuvx_no(t,u,v,x)*tmp1(v)
+      enddo
+    enddo
+
+!    do v=1,n_act_orb
+!      vv=list_act(v)
+!      v3=v+n_core_inact_orb
+!      do x=1,n_act_orb
+!        xx=list_act(x)
+!        x3=x+n_core_inact_orb
+!        term+=2.D0*(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v))*bielec_pxxq_no(aa,x3,v3,bb)
+!      end do
+!    end do
+    call dgemm('T','N', n_act_orb, n_act_orb, cholesky_mo_num, 2.d0, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,aa), cholesky_mo_num, &
+                 cholesky_no_total_transp(1,n_core_inact_orb+1,bb), cholesky_mo_num, 0.d0, &
+                 tmp2, n_act_orb)
     do v=1,n_act_orb
-      vv=list_act(v)
-      v3=v+n_core_inact_orb
       do x=1,n_act_orb
-        xx=list_act(x)
-        x3=x+n_core_inact_orb
-        term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3)      &
-            +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v))                 &
-            *bielec_pxxq_no(aa,x3,v3,bb))
-      end do
-    end do
+        term = term + P0tuvx_no(t,x,v,u)*tmp2(x,v)+P0tuvx_no(t,x,u,v)*tmp2(x,v)
+      enddo
+    enddo
+
     if (a.eq.b) then
       term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu))
       do v=1,n_act_orb
         do y=1,n_act_orb
           do x=1,n_act_orb
-            term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu)
-            term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt)
+            term = term - P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) &
+                        - P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt)
           end do
         end do
       end do

From 4ecb15a727373d6a29f122dc165babaed237ffe9 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Feb 2025 19:17:43 +0100
Subject: [PATCH 23/59] Trying to fix Davidson in CASSCF

---
 src/dav_general_mat/dav_general.irp.f | 41 ++++++++++++---------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f
index a277d9ef..114476c2 100644
--- a/src/dav_general_mat/dav_general.irp.f
+++ b/src/dav_general_mat/dav_general.irp.f
@@ -82,7 +82,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
   nproc_target = nproc
   double precision :: rss
   integer :: maxab
-  maxab = sze 
+  maxab = sze
 
   m=1
   disk_based = .False.
@@ -204,7 +204,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
       u_in(i,k) = r1*dcos(r2)
     enddo
   enddo
-  ! Normalize all states 
+  ! Normalize all states
   do k=1,N_st_diag
     call normalize(u_in(:,k),sze)
   enddo
@@ -228,20 +228,13 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
       shift  = N_st_diag*(iter-1)
       shift2 = N_st_diag*iter
 
-      if ((iter > 1).or.(itertot == 1)) then
-        ! Compute |W_k> = \sum_i |i><i|H|u_k>
-        ! -----------------------------------
+      ! Compute |W_k> = \sum_i |i><i|H|u_k>
+      ! -----------------------------------
 
-         ! Gram-Smitt to orthogonalize all new guess with the previous vectors 
-          call ortho_qr(U,size(U,1),sze,shift2)
-          call ortho_qr(U,size(U,1),sze,shift2)
+       ! Gram-Smitt to orthogonalize all new guess with the previous vectors
+      call ortho_qr(U,size(U,1),sze,shift2)
 
-!            call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
-            call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat)
-      else
-         ! Already computed in update below
-         continue
-      endif
+      call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat)
 
       ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
       ! -------------------------------------------
@@ -311,12 +304,12 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
         do i=1,sze
           U(i,shift2+k) =  &
             (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              /max(H_jj(i) - lambda (k),1.d-2)
+              /max(dabs(H_jj(i) - lambda (k)),1.d-2) * dsign(1d0,H_jj(i) - lambda (k))
         enddo
 
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
-          to_print(1,k) = lambda(k) 
+          to_print(1,k) = lambda(k)
           to_print(2,k) = residual_norm(k)
         endif
       enddo
@@ -324,7 +317,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
 
 
       if ((itertot>1).and.(iter == 1)) then
-        !don't print 
+        !don't print
         continue
       else
         write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
@@ -333,11 +326,11 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
       ! Check convergence
       if (iter > 1) then
           converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson
-      endif   
-      
+      endif
+
 
       do k=1,N_st
-        if (residual_norm(k) > 1.e8) then
+        if (residual_norm(k) > 1.d8) then
           print *, 'Davidson failed'
           stop -1
         endif
@@ -365,13 +358,15 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
 
     call dgemm('N','N', sze, N_st_diag, shift2, 1.d0,      &
         U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+
     do k=1,N_st_diag
       do i=1,sze
         U(i,k) = u_in(i,k)
       enddo
     enddo
-      call ortho_qr(U,size(U,1),sze,N_st_diag)
-      call ortho_qr(U,size(U,1),sze,N_st_diag)
+
+    call ortho_qr(U,size(U,1),sze,N_st_diag)
+
     do j=1,N_st_diag
       k=1
       do while ((k<sze).and.(U(k,j) == 0.d0))
@@ -412,7 +407,7 @@ subroutine hpsi(v,u,N_st,sze,h_mat)
   use bitmasks
   implicit none
   BEGIN_DOC
-  ! Computes $v = H | u \rangle$ and 
+  ! Computes $v = H | u \rangle$ and
   END_DOC
   integer, intent(in)              :: N_st,sze
   double precision, intent(in)     :: u(sze,N_st),h_mat(sze,sze)

From 25e98da2d6ce1a296379bd311d017bf1b08eb165 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 7 Feb 2025 18:38:52 +0100
Subject: [PATCH 24/59] added mo_in_r on extra grid

---
 src/dft_utils_in_r/ao_in_r.irp.f |  6 ++---
 src/dft_utils_in_r/mo_in_r.irp.f | 41 ++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index e9c003d4..c8822776 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -185,9 +185,9 @@ END_PROVIDER
  END_PROVIDER
 
  BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
- implicit none                                                                                                                                                                                             
+ implicit none
  BEGIN_DOC
- ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point of the EXTRA grid 
  END_DOC
  integer :: i,j
  double precision :: aos_array(ao_num), r(3)
@@ -214,7 +214,7 @@ END_PROVIDER
 BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
 
   BEGIN_DOC
-  ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+  ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid 
   END_DOC
  
   implicit none
diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f
index ad931402..623de4f8 100644
--- a/src/dft_utils_in_r/mo_in_r.irp.f
+++ b/src/dft_utils_in_r/mo_in_r.irp.f
@@ -181,3 +181,44 @@
  END_PROVIDER
 
 
+
+!!!!!EXTRA GRID 
+
+ BEGIN_PROVIDER[double precision, mos_in_r_array_extra_omp, (mo_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! mos_in_r_array_extra(i,j)        = value of the ith mo on the jth grid point on the EXTRA GRID 
+ END_DOC
+ integer :: i,j
+ double precision :: mos_array_extra(mo_num), r(3)
+ print*,'coucou'
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,mos_array_extra,j) & 
+ !$OMP SHARED(mos_in_r_array_extra_omp,n_points_extra_final_grid,mo_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_mos_at_r(r,mos_array_extra)
+  do j = 1, mo_num
+   mos_in_r_array_extra_omp(j,i) = mos_array_extra(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+ print*,'coucou fin'
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, mos_in_r_array_extra_transp,(n_points_extra_final_grid,mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! mos_in_r_array_extra_transp(i,j) = value of the jth mo on the ith grid point
+ END_DOC
+ integer :: i,j
+ do i = 1, n_points_extra_final_grid
+  do j = 1, mo_num
+   mos_in_r_array_extra_transp(i,j) = mos_in_r_array_extra_omp(j,i) 
+  enddo
+ enddo
+ END_PROVIDER

From fe9ea75ae79a5c7b641733c27d0801aeb4bf3af5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 11 Feb 2025 19:02:00 +0100
Subject: [PATCH 25/59] Added Spd module in OCaml

---
 bin/qp_convert_output_to_ezfio        |  3 +--
 ocaml/Angmom.ml                       | 31 ++++++++++++++++++++++++---
 ocaml/Angmom.mli                      | 25 +++++++++++++++++++--
 ocaml/Long_basis.ml                   |  4 ++--
 src/mo_two_e_ints/map_integrals.irp.f |  1 -
 src/scf_utils/roothaan_hall_scf.irp.f |  2 +-
 6 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio
index 6f2d02d0..95822770 100755
--- a/bin/qp_convert_output_to_ezfio
+++ b/bin/qp_convert_output_to_ezfio
@@ -154,8 +154,7 @@ def write_ezfio(res, filename):
     prim_num_max = ezfio.get_ao_basis_ao_prim_num_max()
 
     for i in range(len(res.basis)):
-        coefficient[
-            i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
+        coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
         exponent[i] += [0. for j in range(len(exponent[i]), prim_num_max)]
 
     coefficient = reduce(lambda x, y: x + y, coefficient, [])
diff --git a/ocaml/Angmom.ml b/ocaml/Angmom.ml
index 2da09340..4e315a95 100644
--- a/ocaml/Angmom.ml
+++ b/ocaml/Angmom.ml
@@ -77,6 +77,7 @@ module Xyz = struct
   type t = { x: Positive_int.t ;
              y: Positive_int.t ;
              z: Positive_int.t } [@@deriving sexp]
+
   type state_type = Null | X | Y | Z
 
   (** Builds an XYZ triplet from a string.
@@ -147,8 +148,8 @@ module Xyz = struct
    in Positive_int.of_int (x+y+z)
 
 
- (** Returns a list of XYZ powers for a given symmetry *)
- let of_symmetry sym =
+ (** Returns a list of XYZ powers for a given angular momentum *)
+ let of_angmom sym =
    let l = Positive_int.to_int (to_l sym) in
    let create_z xyz =
     { x=xyz.x ;
@@ -179,7 +180,31 @@ module Xyz = struct
    |> List.rev
 
 
- (** Returns the symmetry corresponding to the XYZ triplet *)
+ (** Returns the angular momentum corresponding to the XYZ triplet *)
+ let to_symmetry sym = of_l (get_l sym)
+
+end
+
+module Spd = struct
+  type t = { l: st; m: int } [@@deriving sexp]
+
+  let to_string { l ; m } =
+    (to_string l) ^ " " ^ (if m > 0 then "+" else "") ^ (string_of_int m)
+
+  let of_string s = match String_ext.lsplit2 ~on:' ' s with
+    | Some (l, m) -> { l=of_string l ; m=int_of_string m }
+    | _ -> failwith ("Invalid Spd: "^s)
+
+ (** Returns the l quantum number from a XYZ powers triplet *)
+  let get_l { l ; _ } = to_l l
+
+ (** Returns a list of XYZ powers for a given angular momentum *)
+ let of_angmom sym =
+   let l = Positive_int.to_int (to_l sym) in
+   Array.init (2*l+1) (fun i -> { l=sym ; m=i-l })
+   |> Array.to_list
+
+ (** Returns the angular momentum corresponding to the XYZ triplet *)
  let to_symmetry sym = of_l (get_l sym)
 
 end
diff --git a/ocaml/Angmom.mli b/ocaml/Angmom.mli
index 2ab63003..006717aa 100644
--- a/ocaml/Angmom.mli
+++ b/ocaml/Angmom.mli
@@ -28,9 +28,30 @@ module Xyz :
     val get_l : t -> Qptypes.Positive_int.t
 
     (** Returns a list of XYZ powers for a given symmetry *)
-    val of_symmetry : st -> t list
+    val of_angmom : st -> t list
 
     (** Returns the symmetry corresponding to the XYZ powers *)
     val to_symmetry : t -> st
 
-  end
+end
+
+
+module Spd :
+  sig
+    type t = { l: st; m: int } [@@deriving sexp]
+
+    (** The string format contains the l and m quantum numbers *)
+
+    val of_string : string -> t
+    val to_string : t -> string
+
+    (** Returns the quantum number l *)
+    val get_l : t -> Qptypes.Positive_int.t
+
+    (** Returns a list of XYZ powers for a given symmetry *)
+    val of_angmom : st -> t list
+
+    (** Returns the symmetry corresponding to the XYZ powers *)
+    val to_symmetry : t -> st
+
+end
diff --git a/ocaml/Long_basis.ml b/ocaml/Long_basis.ml
index a8ea3c66..96f7566b 100644
--- a/ocaml/Long_basis.ml
+++ b/ocaml/Long_basis.ml
@@ -10,7 +10,7 @@ let of_basis b =
     | (g,n)::tail ->
         begin
           let new_accu =
-            Angmom.Xyz.of_symmetry g.Gto.sym
+            Angmom.Xyz.of_angmom g.Gto.sym
             |> List.rev_map (fun x-> (x,g,n))
           in
           do_work (new_accu@accu) tail
@@ -25,7 +25,7 @@ let to_basis b =
   | [] -> List.rev accu
   | (s,g,n)::tail ->
     let first_sym =
-      Angmom.Xyz.of_symmetry g.Gto.sym
+      Angmom.Xyz.of_angmom g.Gto.sym
       |> List.hd
     in
     let new_accu =
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index b5f78b7b..c95a8f44 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -89,7 +89,6 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:mo_integrals_cache_s
 
    !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic)
    do l=mo_integrals_cache_min,mo_integrals_cache_max
-     print *, l
      do k=mo_integrals_cache_min,mo_integrals_cache_max
          ii = int(l-mo_integrals_cache_min,8)
          ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8))
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 9e2ca4bc..4ba0964a 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -70,7 +70,7 @@ END_DOC
 
     dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
 
-    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
+    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-10) )  then
 
       ! Store Fock and error matrices at each iteration
       do j=1,ao_num

From 9d2a2ee754b69972fe38ae8e4190bfa2df268ea5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 12 Feb 2025 12:01:04 +0100
Subject: [PATCH 26/59] Introducing trexion convention

---
 bin/qp_convert_output_to_ezfio          |  2 +
 external/ezfio                          |  2 +-
 ocaml/qp_create_ezfio.ml                |  1 +
 scripts/qp_import_trexio.py             | 16 +++--
 src/ao_basis/aos.irp.f                  | 92 ++++++++++++++++++-------
 src/basis/EZFIO.cfg                     |  1 -
 src/basis/basis.irp.f                   | 69 -------------------
 src/trexio/export_trexio_routines.irp.f | 35 ++++++----
 8 files changed, 101 insertions(+), 117 deletions(-)

diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio
index 95822770..9c0441b8 100755
--- a/bin/qp_convert_output_to_ezfio
+++ b/bin/qp_convert_output_to_ezfio
@@ -44,6 +44,7 @@ def write_ezfio(res, filename):
 
     res.clean_uncontractions()
     ezfio.set_file(filename)
+    ezfio.set_ezfio_files_ezfio_convention(20250211)
 
     #  _
     # |_ |  _   _ _|_ ._ _  ._   _
@@ -172,6 +173,7 @@ def write_ezfio(res, filename):
     # ~#~#~#~#~ #
 
     ezfio.set_ao_basis_ao_coef(coef)
+    ezfio.set_basis_ao_normalized(True)
     ezfio.set_ao_basis_ao_expo(expo)
     ezfio.set_ao_basis_ao_basis("Read by resultsFile")
 
diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml
index 4e17c0ad..ad8d44ca 100644
--- a/ocaml/qp_create_ezfio.ml
+++ b/ocaml/qp_create_ezfio.ml
@@ -265,6 +265,7 @@ let run ?o b au c d m p cart xyz_file =
   let write_file () =
       (* Create EZFIO *)
       Ezfio.set_file ezfio_file;
+      Ezfio.set_files_ezfio_ezfio_convention 20250211;
 
       (* Write Pseudo *)
       let pseudo =
diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py
index 23f48eef..a515efba 100755
--- a/scripts/qp_import_trexio.py
+++ b/scripts/qp_import_trexio.py
@@ -84,6 +84,7 @@ def write_ezfio(trexio_filename, filename):
 
     ezfio.set_file(filename)
     ezfio.set_trexio_trexio_file(trexio_filename)
+    ezfio.set_ezfio_files_ezfio_convention(20250211)
 
     print("Nuclei\t\t...\t", end=' ')
 
@@ -315,8 +316,8 @@ def write_ezfio(trexio_filename, filename):
                 power_x.append(x)
                 power_y.append(y)
                 power_z.append(z)
-                coefficient.append(coef[i])
-                exponent.append(expo[i])
+                coefficient.append(list(coef[i]))
+                exponent.append(list(expo[i]))
                 num_prim.append(num_prim0[i])
 
         assert (len(coefficient) == ao_num)
@@ -326,15 +327,15 @@ def write_ezfio(trexio_filename, filename):
 
         prim_num_max = max( [ len(x) for x in coefficient ] )
 
-        ao_normalization = trexio.read_ao_normalization(trexio_file_cart)
-        for i, coef in enumerate(coefficient):
-           for j in range(len(coef)):
-              coef[j] *= ao_normalization[i]
-
         for i in range(ao_num):
             coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
             exponent   [i] += [0. for j in range(len(exponent[i]), prim_num_max)]
 
+        ao_normalization = trexio.read_ao_normalization(trexio_file_cart)
+        for i in range(ao_num):
+           for j in range(prim_num_max):
+               coefficient[i][j] *= ao_normalization[i]
+
         coefficient = reduce(lambda x, y: x + y, coefficient, [])
         exponent    = reduce(lambda x, y: x + y, exponent   , [])
 
@@ -345,6 +346,7 @@ def write_ezfio(trexio_filename, filename):
                 coef.append(coefficient[j])
                 expo.append(exponent[j])
 
+
         ezfio.set_ao_basis_ao_coef(coef)
         ezfio.set_ao_basis_ao_expo(expo)
 
diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f
index d718e935..440cc865 100644
--- a/src/ao_basis/aos.irp.f
+++ b/src/ao_basis/aos.irp.f
@@ -53,42 +53,84 @@ END_PROVIDER
   C_A(3) = 0.d0
   ao_coef_normalized = 0.d0
 
-  do i=1,ao_num
+  if (primitives_normalized) then
 
-    powA(1) = ao_power(i,1)
-    powA(2) = ao_power(i,2)
-    powA(3) = ao_power(i,3)
+    if (ezfio_convention >= 20250211) then
+      ! Same primitive normalization factors for all AOs of the same shell, or read from trexio file
 
-    ! Normalization of the primitives
-    if (primitives_normalized) then
-      do j=1,ao_prim_num(i)
-        call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), &
-           powA,powA,overlap_x,overlap_y,overlap_z,norm,nz)
-        ao_coef_normalized(i,j) = ao_coef(i,j)/dsqrt(norm)
+      do i=1,ao_num
+        k=1
+        do while (k<=prim_num .and. shell_index(k) /= ao_shell(i))
+          k = k+1
+        end do
+        do j=1,ao_prim_num(i)
+          ao_coef_normalized(i,j) = ao_coef(i,j)*prim_normalization_factor(k+j-1)
+        enddo
       enddo
+
     else
+        ! GAMESS convention for primitive factors
+
+      do i=1,ao_num
+        powA(1) = ao_power(i,1)
+        powA(2) = ao_power(i,2)
+        powA(3) = ao_power(i,3)
+
+        do j=1,ao_prim_num(i)
+          call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), &
+             powA,powA,overlap_x,overlap_y,overlap_z,norm,nz)
+          ao_coef_normalized(i,j) = ao_coef(i,j)/dsqrt(norm)
+        enddo
+      enddo
+
+    endif
+
+  else
+
+    do i=1,ao_num
       do j=1,ao_prim_num(i)
         ao_coef_normalized(i,j) = ao_coef(i,j)
       enddo
-    endif
-
-    ! Normalization of the contracted basis functions
-    norm = 0.d0
-    do j=1,ao_prim_num(i)
-      do k=1,ao_prim_num(i)
-        call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
-        norm = norm+c*ao_coef_normalized(i,j)*ao_coef_normalized(i,k)
-      enddo
     enddo
-    ao_coef_normalization_factor(i) = 1.d0/dsqrt(norm)
 
-    if (ao_normalized) then
-      do j=1,ao_prim_num(i)
-        ao_coef_normalized(i,j) = ao_coef_normalized(i,j) * ao_coef_normalization_factor(i)
+  endif
+
+  double precision, allocatable :: self_overlap(:)
+  allocate(self_overlap(ao_num))
+
+  do i=1,ao_num
+    powA(1) = ao_power(i,1)
+    powA(2) = ao_power(i,2)
+    powA(3) = ao_power(i,3)
+    self_overlap(i) = 0.d0
+    do j=1,ao_prim_num(i)
+      do k=1,j-1
+        call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
+        self_overlap(i) = self_overlap(i) + 2.d0*c*ao_coef_normalized(i,j)*ao_coef_normalized(i,k)
       enddo
-    else
+      call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j),powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
+      self_overlap(i) = self_overlap(i) +c*ao_coef_normalized(i,j)*ao_coef_normalized(i,j)
+    enddo
+  enddo
+
+  if (ao_normalized) then
+
+    do i=1,ao_num
+      ao_coef_normalization_factor(i) = 1.d0/dsqrt(self_overlap(i))
+    enddo
+
+  else
+
+    do i=1,ao_num
       ao_coef_normalization_factor(i) = 1.d0
-    endif
+    enddo
+
+  endif
+
+  do i=1,ao_num
+    do j=1,ao_prim_num(i)
+      ao_coef_normalized(i,j) = ao_coef_normalized(i,j) * ao_coef_normalization_factor(i)
+    enddo
   enddo
 
 END_PROVIDER
diff --git a/src/basis/EZFIO.cfg b/src/basis/EZFIO.cfg
index 03e224e4..a9ec2c1b 100644
--- a/src/basis/EZFIO.cfg
+++ b/src/basis/EZFIO.cfg
@@ -84,4 +84,3 @@ type: logical
 doc: If true, normalize the basis functions
 interface: ezfio, provider, ocaml
 default: false
-
diff --git a/src/basis/basis.irp.f b/src/basis/basis.irp.f
index 5374e5be..81282cb5 100644
--- a/src/basis/basis.irp.f
+++ b/src/basis/basis.irp.f
@@ -1,72 +1,3 @@
-BEGIN_PROVIDER [ double precision, shell_normalization_factor , (shell_num) ]
-  implicit none
-  BEGIN_DOC
-  ! Number of primitives per |AO|
-  END_DOC
-
-  logical                        :: has
-  PROVIDE ezfio_filename
-  if (.not.ao_normalized) then
-    shell_normalization_factor = 1.d0
-    return
-  endif
-
-  if (mpi_master) then
-    if (size(shell_normalization_factor) == 0) return
-
-    call ezfio_has_basis_shell_normalization_factor(has)
-    if (has) then
-      write(6,'(A)') '.. >>>>> [ IO READ: shell_normalization_factor ] <<<<< ..'
-      call ezfio_get_basis_shell_normalization_factor(shell_normalization_factor)
-    else
-
-      double precision               :: norm,overlap_x,overlap_y,overlap_z,C_A(3), c
-      integer                        :: l, powA(3), nz
-      integer                        :: i,j,k
-      nz=100
-      C_A(1) = 0.d0
-      C_A(2) = 0.d0
-      C_A(3) = 0.d0
-
-      do i=1,shell_num
-
-        powA(1) = shell_ang_mom(i)
-        powA(2) = 0
-        powA(3) = 0
-
-        norm = 0.d0
-        do k=1, prim_num
-          if (shell_index(k) /= i) cycle
-          do j=1, prim_num
-            if (shell_index(j) /= i) cycle
-            call overlap_gaussian_xyz(C_A,C_A,prim_expo(j),prim_expo(k), &
-              powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
-            norm = norm+c*prim_coef(j)*prim_coef(k) * prim_normalization_factor(j) * prim_normalization_factor(k)
-          enddo
-        enddo
-        shell_normalization_factor(i) = 1.d0/dsqrt(norm)
-      enddo
-
-    endif
-  endif
-  IRP_IF MPI_DEBUG
-  print *,  irp_here, mpi_rank
-  call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-  IRP_IF MPI
-  include 'mpif.h'
-  integer                        :: ierr
-  call MPI_BCAST( shell_normalization_factor, (shell_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-  if (ierr /= MPI_SUCCESS) then
-    stop 'Unable to read shell_normalization_factor with MPI'
-  endif
-  IRP_ENDIF
-
-  call write_time(6)
-
-END_PROVIDER
-
-
 BEGIN_PROVIDER [ double precision, prim_normalization_factor , (prim_num) ]
   implicit none
   BEGIN_DOC
diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f
index c60b1aa0..6391233a 100644
--- a/src/trexio/export_trexio_routines.irp.f
+++ b/src/trexio/export_trexio_routines.irp.f
@@ -15,6 +15,8 @@ subroutine export_trexio(update,full_path)
 
   integer, external :: getunitandopen
 
+  integer :: i,j,l
+
   if (full_path) then
     fp = trexio_filename
     call system('realpath '//trim(fp)//' > '//trim(fp)//'.tmp')
@@ -271,7 +273,7 @@ subroutine export_trexio(update,full_path)
      call trexio_assert(rc, TREXIO_SUCCESS)
 
      allocate(factor(shell_num))
-     factor(1:shell_num) = shell_normalization_factor(1:shell_num)
+     factor(1:shell_num) = 1.d0
      rc = trexio_write_basis_shell_factor(f(1), factor)
      call trexio_assert(rc, TREXIO_SUCCESS)
 
@@ -312,22 +314,27 @@ subroutine export_trexio(update,full_path)
     rc = trexio_write_ao_shell(f(1), ao_shell)
     call trexio_assert(rc, TREXIO_SUCCESS)
 
-    integer :: i, pow0(3), powA(3), j, l, nz
-    double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c
-    nz=100
+    if (ezfio_convention >= 20250211) then
+      rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor)
+      print *, ao_coef_normalization_factor(:)
+    else
+      integer :: pow0(3), powA(3), nz
+      double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c
+      nz=100
 
-    C_A(1) = 0.d0
-    C_A(2) = 0.d0
-    C_A(3) = 0.d0
+      C_A(1) = 0.d0
+      C_A(2) = 0.d0
+      C_A(3) = 0.d0
 
-    allocate(factor(ao_num))
-    do i=1,ao_num
-      l = ao_first_of_shell(ao_shell(i))
-      factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
-    enddo
-    rc = trexio_write_ao_normalization(f(1), factor)
+      allocate(factor(ao_num))
+      do i=1,ao_num
+        l = ao_first_of_shell(ao_shell(i))
+        factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
+      enddo
+      rc = trexio_write_ao_normalization(f(1), factor)
+      deallocate(factor)
+    endif
     call trexio_assert(rc, TREXIO_SUCCESS)
-    deallocate(factor)
 
   endif
 

From 74d56370e8c6e8758fc22b50f02618d7bbb17d7a Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 12 Feb 2025 12:02:04 +0100
Subject: [PATCH 27/59] Added 1 to unsigned shift to avoid zero index

---
 external/ezfio                          | 2 +-
 src/determinants/spindeterminants.irp.f | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f
index 87c5d360..01979c02 100644
--- a/src/determinants/spindeterminants.irp.f
+++ b/src/determinants/spindeterminants.irp.f
@@ -14,7 +14,7 @@ integer*8 function spin_det_search_key(det,Nint)
   END_DOC
   integer, intent(in) :: Nint
   integer(bit_kind), intent(in) :: det(Nint)
-  integer(bit_kind), parameter :: unsigned_shift = -huge(1_bit_kind) ! 100...00
+  integer(bit_kind), parameter :: unsigned_shift = 1_bit_kind-huge(1_bit_kind) ! 100...00
   integer :: i
   spin_det_search_key = det(1)
   do i=2,Nint

From 4a8551be153a7ef1937ed887a33dfb0b870be196 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 12 Feb 2025 13:26:36 +0100
Subject: [PATCH 28/59] Fixing previous commit

---
 ocaml/Angmom.ml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ocaml/Angmom.ml b/ocaml/Angmom.ml
index 4e315a95..0f1658f0 100644
--- a/ocaml/Angmom.ml
+++ b/ocaml/Angmom.ml
@@ -2,6 +2,7 @@ open Qptypes
 open Sexplib.Std
 
 type t = S|P|D|F|G|H|I|J|K|L [@@deriving sexp]
+type st = t [@@deriving sexp]
 
 let to_string = function
   | S -> "S"
@@ -70,9 +71,6 @@ let of_l i =
   | x -> raise (Failure ("Angmom should be S|P|D|F|G|H|I|J|K|L"))
 
 
-type st = t
-
-
 module Xyz = struct
   type t = { x: Positive_int.t ;
              y: Positive_int.t ;

From eb6e1d4339fe6342da40c56184119c0545b97507 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 13 Feb 2025 10:23:52 +0100
Subject: [PATCH 29/59] Introduced qp_bug.irp.f

---
 src/determinants/spindeterminants.irp.f | 22 ++++++++++++++++------
 src/utils/bug.irp.f                     | 23 +++++++++++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)
 create mode 100644 src/utils/bug.irp.f

diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f
index 01979c02..2f497bd7 100644
--- a/src/determinants/spindeterminants.irp.f
+++ b/src/determinants/spindeterminants.irp.f
@@ -197,7 +197,9 @@ integer function get_index_in_psi_det_alpha_unique(key,Nint)
   enddo
   i += 1
 
-  ASSERT (i <= N_det_alpha_unique)
+  if (i> N_det_alpha_unique) then
+    call qp_bug(irp_here, i, 'i> N_det_alpha_unique')
+  endif
 
   !DIR$ FORCEINLINE
   do while (spin_det_search_key(psi_det_alpha_unique(1,i),Nint) == det_ref)
@@ -219,12 +221,15 @@ integer function get_index_in_psi_det_alpha_unique(key,Nint)
     endif
     i += 1
     if (i > N_det_alpha_unique) then
-      ASSERT (get_index_in_psi_det_alpha_unique > 0)
-      return
+      exit
     endif
 
   enddo
 
+  if (get_index_in_psi_det_alpha_unique <= 0) then
+    call qp_bug(irp_here, get_index_in_psi_det_alpha_unique, 'get_index_in_psi_det_alpha_unique <= 0')
+  endif
+
 end
 
 integer function get_index_in_psi_det_beta_unique(key,Nint)
@@ -277,7 +282,9 @@ integer function get_index_in_psi_det_beta_unique(key,Nint)
   enddo
   i += 1
 
-  ASSERT (i <= N_det_beta_unique)
+  if (i > N_det_beta_unique) then
+    call qp_bug(irp_here, i, 'i> N_det_beta_unique')
+  endif
 
   !DIR$ FORCEINLINE
   do while (spin_det_search_key(psi_det_beta_unique(1,i),Nint) == det_ref)
@@ -299,12 +306,15 @@ integer function get_index_in_psi_det_beta_unique(key,Nint)
     endif
     i += 1
     if (i > N_det_beta_unique) then
-      ASSERT (get_index_in_psi_det_beta_unique > 0)
-      return
+      exit
     endif
 
   enddo
 
+  if (get_index_in_psi_det_beta_unique <= 0) then
+    call qp_bug(irp_here, i, 'get_index_in_psi_det_beta_unique <= 0')
+  endif
+
 end
 
 
diff --git a/src/utils/bug.irp.f b/src/utils/bug.irp.f
new file mode 100644
index 00000000..0e2ad551
--- /dev/null
+++ b/src/utils/bug.irp.f
@@ -0,0 +1,23 @@
+subroutine qp_bug(from, code, message)
+  implicit none
+  BEGIN_DOC
+! This routine prints a bug report
+  END_DOC
+  character*(*) :: from
+  integer :: code
+  character*(*) :: message
+
+  print *, ''
+  print *, '======================='
+  print *, 'Bug in Quantum Package!'
+  print *, '======================='
+  print *, ''
+  print *, '   from: ', trim(from)
+  print *, '   code: ', code
+  print *, '   info: ', trim(message)
+  print *, ''
+  print *, 'Please report this bug at https://github.com/QuantumPackage/qp2/issues'
+  print *, 'with your output file attached.'
+  print *, ''
+  stop -1
+end subroutine qp_bug

From 70ad9f31b390b5931392a52ae375fab7c1e9f241 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 13 Feb 2025 11:58:46 +0100
Subject: [PATCH 30/59] Accelerated PT2-cholesky with dgemm

---
 external/ezfio                        |  2 +-
 src/cipsi/selection.irp.f             | 28 +++++++++++++------
 src/mo_two_e_ints/map_integrals.irp.f | 40 ++++++++++++++++++++++-----
 3 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index d02132ea..dba01c4f 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
+Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index af7420c8..8c22ec85 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -1478,17 +1478,21 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
 
   integer, parameter :: bant=1
 
-  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
-  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+!  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
+  double precision, allocatable :: hij_cache1(:,:), hij_cache2(:,:)
+!  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
 
   PROVIDE mo_integrals_threshold
 
   if(sp == 3) then ! AB
+
+    allocate(hij_cache1(mo_num,mo_num))
+
     h1 = p(1,1)
     h2 = p(1,2)
+    call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache1,mo_integrals_map)
     do p1=1, mo_num
       if(bannedOrb(p1, 1)) cycle
-      call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
       do p2=1, mo_num
         if(bannedOrb(p2,2)) cycle
         if(banned(p1, p2, bant)) cycle ! rentable?
@@ -1497,7 +1501,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           call i_h_j(gen, det, N_int, hij)
         else
           phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
-          hij = hij_cache1(p2) * phase
+          hij = hij_cache1(p2,p1) * phase
         end if
         if (dabs(hij) < mo_integrals_threshold) cycle
         !DIR$ LOOP COUNT AVG(4)
@@ -1507,13 +1511,18 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
       end do
     end do
 
+    deallocate(hij_cache1)
+
   else ! AA BB
+
+    allocate(hij_cache1(mo_num,mo_num),hij_cache2(mo_num,mo_num))
+
     p1 = p(1,sp)
     p2 = p(2,sp)
+    call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache1,mo_integrals_map)
+    call get_mo_two_e_integrals_ij(p1,p2,mo_num,hij_cache2,mo_integrals_map)
     do puti=1, mo_num
       if (bannedOrb(puti, sp)) cycle
-      call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map)
-      call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map)
       do putj=puti+1, mo_num
         if(bannedOrb(putj, sp)) cycle
         if(banned(puti, putj, bant)) cycle ! rentable?
@@ -1522,7 +1531,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           call i_h_j(gen, det, N_int, hij)
           if (dabs(hij) < mo_integrals_threshold) cycle
         else
-          hij = hij_cache1(putj) - hij_cache2(putj)
+          hij = hij_cache1(putj,puti) - hij_cache2(putj,puti)
           if (dabs(hij) < mo_integrals_threshold) cycle
           hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
         end if
@@ -1532,9 +1541,12 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         enddo
       end do
     end do
+
+    deallocate(hij_cache1,hij_cache2)
+
   end if
 
-  deallocate(hij_cache1,hij_cache2)
+!  deallocate(hij_cache1,hij_cache2)
 end
 
 
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 5a005d7b..8f3c72d0 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -262,8 +262,14 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
             out_val, 1)
         else
           integer :: isplit
-          out_val = 0.d0
-          do isplit=1,4
+          call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+              mo_integrals_cache_min-1, 1., &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+              out_val_sp, 1)
+            out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1)
+          out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1)
+          do isplit=2,4
             call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
               mo_integrals_cache_min-1, 1., &
               cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
@@ -442,16 +448,36 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map)
   double precision, intent(out)  :: out_array(sze,sze)
   type(map_type), intent(inout)  :: map
   integer                        :: j
-  real(integral_kind), allocatable :: tmp_val(:)
 
   if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max<mo_num) ) then
 
     if (do_mo_cholesky) then
 
-      call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, &
-         cholesky_mo_transp(1,1,k), cholesky_mo_num, &
-         cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, &
-         out_array, sze)
+      if (mo_cholesky_double) then
+          call dgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.d0, &
+             cholesky_mo_transp(1,1,k), cholesky_mo_num, &
+             cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, &
+             out_array, sze)
+      else
+          integer :: isplit
+          double precision, allocatable :: out_array_sp(:,:)
+          allocate(out_array_sp(sze,sze))
+          call sgemm('T', 'N', mo_num, mo_num, &
+             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, &
+             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, &
+             out_array_sp, sze)
+          out_array(1:sze,1:sze) = out_array_sp(1:sze,1:sze)
+          do isplit=2,4
+            call sgemm('T', 'N', mo_num, mo_num, &
+             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, &
+             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, &
+             out_array_sp, sze)
+            out_array(1:sze,1:sze) = out_array(1:sze,1:sze) + out_array_sp(1:sze,1:sze)
+         enddo
+         deallocate(out_array_sp)
+      endif
 
     else
 

From 96e2783d1e169b295b0cb14ce0a9ee4195e5160f Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 13 Feb 2025 15:05:27 +0100
Subject: [PATCH 31/59] Add possibility of single precision cholesky vectors

---
 src/ao_two_e_ints/cholesky.irp.f      |   2 +-
 src/mo_two_e_ints/EZFIO.cfg           |   6 +
 src/mo_two_e_ints/cholesky.irp.f      |   6 +-
 src/mo_two_e_ints/map_integrals.irp.f | 193 +++++++++++++++-----------
 4 files changed, 121 insertions(+), 86 deletions(-)

diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f
index eb2f092a..d15ebdc3 100644
--- a/src/ao_two_e_ints/cholesky.irp.f
+++ b/src/ao_two_e_ints/cholesky.irp.f
@@ -469,7 +469,7 @@ END_PROVIDER
      !$OMP PARALLEL DO PRIVATE(k,j)
      do k=1,rank
        do j=1,ao_num
-           cholesky_ao(1:ao_num,j,rank-k+1) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1)
+           cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,rank-k+1)
        enddo
      enddo
      !$OMP END PARALLEL DO
diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg
index da9d8fc9..f26bfb61 100644
--- a/src/mo_two_e_ints/EZFIO.cfg
+++ b/src/mo_two_e_ints/EZFIO.cfg
@@ -29,4 +29,10 @@ doc: Read/Write MO integrals with the long range interaction from/to disk [    W
 interface: ezfio,provider,ocaml
 default: None
 
+[mo_cholesky_double]
+type: logical
+doc: Use double precision to build integrals from Cholesky vectors
+interface: ezfio,provider,ocaml
+default: True
+
 
diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f
index 835110de..062e52e2 100644
--- a/src/mo_two_e_ints/cholesky.irp.f
+++ b/src/mo_two_e_ints/cholesky.irp.f
@@ -175,9 +175,9 @@ BEGIN_PROVIDER [ real, cholesky_mo_transp_sp, (cholesky_mo_num, mo_num, mo_num)
 
  integer :: i,j,k
  !$OMP PARALLEL DO PRIVATE(k)
- do k=1,cholesky_mo_num
-  do j=1,mo_num
-    do i=1,mo_num
+ do j=1,mo_num
+  do i=1,mo_num
+   do k=1,cholesky_mo_num
       cholesky_mo_transp_sp(k,i,j) = cholesky_mo_transp(k,i,j)
     enddo
   enddo
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 8f3c72d0..bc83fbdd 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -9,15 +9,6 @@ BEGIN_PROVIDER [ logical, all_mo_integrals ]
   PROVIDE mo_two_e_integrals_in_map mo_integrals_cache mo_two_e_integrals_jj_exchange mo_two_e_integrals_jj_anti mo_two_e_integrals_jj big_array_exchange_integrals big_array_coulomb_integrals mo_one_e_integrals
 END_PROVIDER
 
-BEGIN_PROVIDER [ logical, mo_cholesky_double ]
-  implicit none
-  BEGIN_DOC
-! If true, use double precision to compute integrals from cholesky vectors
-  END_DOC
-  mo_cholesky_double = .True.
-END_PROVIDER
-
-
 !! MO Map
 !! ======
 
@@ -193,13 +184,14 @@ double precision function get_two_e_integral(i,j,k,l,map)
       if (mo_cholesky_double) then
         get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1)
       else
-        get_two_e_integral = 0.d0
-        do isplit=1,4
-          get_two_e_integral = get_two_e_integral + &
-                               sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
-                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1)
-        enddo
+        get_two_e_integral = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,j,l), 1)
+!        get_two_e_integral = 0.d0
+!        do isplit=1,4
+!          get_two_e_integral = get_two_e_integral + &
+!                               sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+!                                    cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1)
+!        enddo
       endif
 
     else
@@ -244,7 +236,7 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
   ii = ior(ii, j-mo_integrals_cache_min)
 
   if (do_mo_cholesky.and. .not.mo_cholesky_double) then
-    allocate(out_val_sp(sze))
+    allocate(out_val_sp(mo_num))
   endif
 
   if (iand(ii, -mo_integrals_cache_size) == 0) then
@@ -261,22 +253,27 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
             cholesky_mo_transp(1,j,l), 1, 0.d0, &
             out_val, 1)
         else
-          integer :: isplit
-          call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-              mo_integrals_cache_min-1, 1., &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
-              out_val_sp, 1)
-            out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1)
+          call sgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1., &
+            cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, &
+            cholesky_mo_transp_sp(1,j,l), 1, 0., &
+            out_val_sp, 1)
           out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1)
-          do isplit=2,4
-            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-              mo_integrals_cache_min-1, 1., &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
-              out_val_sp, 1)
-            out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1)
-          enddo
+!          integer :: isplit
+!          isplit=1
+!          call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!              mo_integrals_cache_min-1, 1., &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!              out_val_sp, 1)
+!          out_val(1:mo_integrals_cache_min-1) = out_val_sp(1:mo_integrals_cache_min-1)
+!          do isplit=2,4
+!              call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!                mo_integrals_cache_min-1, 1., &
+!                cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!                cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!                out_val_sp, 1)
+!              out_val(1:mo_integrals_cache_min-1) += out_val_sp(1:mo_integrals_cache_min-1)
+!          enddo
         endif
 
       else
@@ -318,15 +315,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
              cholesky_mo_transp(1,j,l), 1, 0.d0, &
              out_val(mo_integrals_cache_max+1), 1)
         else
-          out_val = 0.d0
-          do isplit=1,4
-            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-              mo_num-mo_integrals_cache_max, 1., &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
-              out_val_sp(mo_integrals_cache_max+1), 1)
-            out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze)
-          enddo
+          call sgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1., &
+             cholesky_mo_transp_sp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, &
+             cholesky_mo_transp_sp(1,j,l), 1, 0., &
+             out_val_sp(mo_integrals_cache_max+1), 1)
+          out_val(mo_integrals_cache_max+1:sze) = out_val_sp(mo_integrals_cache_max+1:sze)
+!          isplit=1
+!          call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!            mo_num-mo_integrals_cache_max, 1., &
+!            cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, &
+!            cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!            out_val_sp(mo_integrals_cache_max+1), 1)
+!          out_val(mo_integrals_cache_max+1:sze) = out_val_sp(mo_integrals_cache_max+1:sze)
+!          do isplit=2,4
+!            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!              mo_num-mo_integrals_cache_max, 1., &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),mo_integrals_cache_max+1,k), cholesky_mo_num, &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!              out_val_sp(mo_integrals_cache_max+1), 1)
+!            out_val(mo_integrals_cache_max+1:sze) += out_val_sp(mo_integrals_cache_max+1:sze)
+!          enddo
         endif
 
       else
@@ -366,15 +374,26 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map)
                cholesky_mo_transp(1,j,l), 1, 0.d0, &
                out_val, 1)
       else
-          out_val = 0.d0
-          do isplit=1,4
-            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-              sze, 1., &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
-              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
-              out_val_sp, 1)
-            out_val(1:sze) += out_val_sp(1:sze)
-          enddo
+          call sgemv('T', cholesky_mo_num, sze, 1., &
+               cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, &
+               cholesky_mo_transp_sp(1,j,l), 1, 0., &
+               out_val_sp, 1)
+          out_val(1:sze) = out_val_sp(1:sze)
+!          isplit=1
+!          call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!            sze, 1., &
+!            cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!            cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!            out_val_sp, 1)
+!          out_val(1:sze) = out_val_sp(1:sze)
+!          do isplit=2,4
+!            call sgemv('T', cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!              sze, 1., &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!              cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),j,l), 1, 0., &
+!              out_val_sp, 1)
+!            out_val(1:sze) += out_val_sp(1:sze)
+!          enddo
       endif
 
     else
@@ -459,23 +478,30 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map)
              cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, &
              out_array, sze)
       else
-          integer :: isplit
-          double precision, allocatable :: out_array_sp(:,:)
+          real, allocatable :: out_array_sp(:,:)
           allocate(out_array_sp(sze,sze))
-          call sgemm('T', 'N', mo_num, mo_num, &
-             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, &
-             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
-             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, &
+          call sgemm('T', 'N', mo_num, mo_num, cholesky_mo_num, 1.0, &
+             cholesky_mo_transp_sp(1,1,k), cholesky_mo_num, &
+             cholesky_mo_transp_sp(1,1,l), cholesky_mo_num, 0.0, &
              out_array_sp, sze)
           out_array(1:sze,1:sze) = out_array_sp(1:sze,1:sze)
-          do isplit=2,4
-            call sgemm('T', 'N', mo_num, mo_num, &
-             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1.d0, &
-             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
-             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0.d0, &
-             out_array_sp, sze)
-            out_array(1:sze,1:sze) = out_array(1:sze,1:sze) + out_array_sp(1:sze,1:sze)
-         enddo
+!
+!          isplit=1
+!          call sgemm('T', 'N', mo_num, mo_num, &
+!             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1., &
+!             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0., &
+!             out_array_sp, sze)
+!          out_array(1:sze,1:sze) = out_array_sp(1:sze,1:sze)
+!          integer :: isplit
+!          do isplit=2,4
+!            call sgemm('T', 'N', mo_num, mo_num, &
+!             cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), 1., &
+!             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,k), cholesky_mo_num, &
+!             cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),1,l), cholesky_mo_num, 0., &
+!             out_array_sp, sze)
+!            out_array(1:sze,1:sze) = out_array(1:sze,1:sze) + out_array_sp(1:sze,1:sze)
+!         enddo
          deallocate(out_array_sp)
       endif
 
@@ -631,12 +657,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
 
           integer :: isplit
           do i=1,mo_integrals_cache_min-1
-             out_val(i) = 0.d0
-             do isplit=1,4
-               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
-             enddo
+             out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1)
+!             out_val(i) = 0.d0
+!             do isplit=1,4
+!               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+!             enddo
           enddo
 
           do i=mo_integrals_cache_min,mo_integrals_cache_max
@@ -644,12 +671,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
           enddo
 
           do i=mo_integrals_cache_max, sze
-             out_val(i) = 0.d0
-             do isplit=1,4
-               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
-             enddo
+             out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1)
+!             out_val(i) = 0.d0
+!             do isplit=1,4
+!               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+!             enddo
           enddo
 
         endif
@@ -663,12 +691,13 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
           enddo
         else
           do i=1,sze
-             out_val(i) = 0.d0
-             do isplit=1,4
-               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
-                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
-             enddo
+             out_val(i) = sdot(cholesky_mo_num, cholesky_mo_transp_sp(1,i,k), 1, cholesky_mo_transp_sp(1,i,l), 1)
+!             out_val(i) = 0.d0
+!             do isplit=1,4
+!               out_val(i) += sdot(cholesky_mo_num_split(isplit+1) - cholesky_mo_num_split(isplit), &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,k), 1, &
+!                                          cholesky_mo_transp_sp(cholesky_mo_num_split(isplit),i,l), 1)
+!             enddo
           enddo
         endif
 

From bce68e7461440405f2cdaf8462a97163ed89899a Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 13 Feb 2025 16:06:05 +0100
Subject: [PATCH 32/59] Simplified hij_cache in pt2

---
 src/cipsi/selection.irp.f | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index 8c22ec85..99bc7013 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -1478,19 +1478,17 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
 
   integer, parameter :: bant=1
 
-!  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
-  double precision, allocatable :: hij_cache1(:,:), hij_cache2(:,:)
-!  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+  double precision, allocatable :: hij_cache(:,:)
 
   PROVIDE mo_integrals_threshold
 
-  if(sp == 3) then ! AB
+  allocate(hij_cache(mo_num,mo_num))
 
-    allocate(hij_cache1(mo_num,mo_num))
+  if(sp == 3) then ! AB
 
     h1 = p(1,1)
     h2 = p(1,2)
-    call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache1,mo_integrals_map)
+    call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache,mo_integrals_map)
     do p1=1, mo_num
       if(bannedOrb(p1, 1)) cycle
       do p2=1, mo_num
@@ -1501,7 +1499,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           call i_h_j(gen, det, N_int, hij)
         else
           phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
-          hij = hij_cache1(p2,p1) * phase
+          hij = hij_cache(p2,p1) * phase
         end if
         if (dabs(hij) < mo_integrals_threshold) cycle
         !DIR$ LOOP COUNT AVG(4)
@@ -1511,16 +1509,11 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
       end do
     end do
 
-    deallocate(hij_cache1)
-
   else ! AA BB
 
-    allocate(hij_cache1(mo_num,mo_num),hij_cache2(mo_num,mo_num))
-
     p1 = p(1,sp)
     p2 = p(2,sp)
-    call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache1,mo_integrals_map)
-    call get_mo_two_e_integrals_ij(p1,p2,mo_num,hij_cache2,mo_integrals_map)
+    call get_mo_two_e_integrals_ij(p2,p1,mo_num,hij_cache,mo_integrals_map)
     do puti=1, mo_num
       if (bannedOrb(puti, sp)) cycle
       do putj=puti+1, mo_num
@@ -1531,7 +1524,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           call i_h_j(gen, det, N_int, hij)
           if (dabs(hij) < mo_integrals_threshold) cycle
         else
-          hij = hij_cache1(putj,puti) - hij_cache2(putj,puti)
+          hij = hij_cache(putj,puti) - hij_cache(puti,putj)
           if (dabs(hij) < mo_integrals_threshold) cycle
           hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
         end if
@@ -1542,11 +1535,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
       end do
     end do
 
-    deallocate(hij_cache1,hij_cache2)
-
   end if
 
-!  deallocate(hij_cache1,hij_cache2)
+  deallocate(hij_cache)
+
 end
 
 

From ed47408948436dd67146c49ddb59c8bb12e9b484 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 14 Feb 2025 11:26:06 +0100
Subject: [PATCH 33/59] Forgot EZFIO.cfg in ezfio_files

---
 src/ezfio_files/EZFIO.cfg | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 src/ezfio_files/EZFIO.cfg

diff --git a/src/ezfio_files/EZFIO.cfg b/src/ezfio_files/EZFIO.cfg
new file mode 100644
index 00000000..a65cda1a
--- /dev/null
+++ b/src/ezfio_files/EZFIO.cfg
@@ -0,0 +1,9 @@
+[ezfio_convention]
+type: integer
+doc: Version of the EZFIO conventions
+interface: ezfio, provider, ocaml
+default: 20210101
+
+# EZFIO conventions
+# 20210101: Old conventions
+# 20250211: Changed normalization of AOs: Moved GAMESS convention from primitives to AOs for compatibility with trexio.

From 7738a8ab8138301e953c4872b0a536c3c77ec643 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 14 Feb 2025 12:42:49 +0100
Subject: [PATCH 34/59] Fixed  9a840ae

---
 ocaml/qp_create_ezfio.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml
index ad8d44ca..9e58a18f 100644
--- a/ocaml/qp_create_ezfio.ml
+++ b/ocaml/qp_create_ezfio.ml
@@ -265,7 +265,7 @@ let run ?o b au c d m p cart xyz_file =
   let write_file () =
       (* Create EZFIO *)
       Ezfio.set_file ezfio_file;
-      Ezfio.set_files_ezfio_ezfio_convention 20250211;
+      Ezfio.set_ezfio_files_ezfio_convention 20250211;
 
       (* Write Pseudo *)
       let pseudo =

From a1ff1a3efc97e9d8a3a86c4dd0b7227aa5820196 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 17 Feb 2025 14:07:12 +0100
Subject: [PATCH 35/59] Fixed qp_create for cc-pcvnz basis

---
 external/ezfio           | 2 +-
 ocaml/qp_create_ezfio.ml | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index d02132ea..dba01c4f 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
+Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml
index 9e58a18f..bc36593e 100644
--- a/ocaml/qp_create_ezfio.ml
+++ b/ocaml/qp_create_ezfio.ml
@@ -131,7 +131,8 @@ let run ?o b au c d m p cart xyz_file =
             let key =
               Element.to_string elem.Atom.element
             in
-            Hashtbl.add basis_table key new_channel
+            if not (Hashtbl.mem basis_table key) then
+              Hashtbl.add basis_table key new_channel
           ) nuclei
         end
       | Some (key, basis) -> (*Aux basis *)

From d04232bb7bf964896f355be29c114d7f959ac387 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 18 Feb 2025 11:36:56 +0100
Subject: [PATCH 36/59] Foce ao_normalized=true for qp_create

---
 ocaml/qp_create_ezfio.ml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml
index bc36593e..ba0e2b7b 100644
--- a/ocaml/qp_create_ezfio.ml
+++ b/ocaml/qp_create_ezfio.ml
@@ -267,6 +267,7 @@ let run ?o b au c d m p cart xyz_file =
       (* Create EZFIO *)
       Ezfio.set_file ezfio_file;
       Ezfio.set_ezfio_files_ezfio_convention 20250211;
+      Ezfio.set_basis_ao_normalized true ;
 
       (* Write Pseudo *)
       let pseudo =

From d20ac13c4f3d5b4d7d2007253aaee689a6b121b5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 18 Feb 2025 11:55:25 +0100
Subject: [PATCH 37/59] Added del orbs in reorder_casscf

---
 src/casscf_cipsi/reorder_orb.irp.f | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f
index 3cb90522..865edd65 100644
--- a/src/casscf_cipsi/reorder_orb.irp.f
+++ b/src/casscf_cipsi/reorder_orb.irp.f
@@ -26,6 +26,11 @@ subroutine reorder_orbitals_for_casscf
   array(iorb) = 3 * mo_num + i
  enddo
 
+ do i = 1, n_del_orb
+  iorb = list_del(i)
+  array(iorb) = 4 * mo_num + i
+ enddo
+
  do i = 1, mo_num
   iorder(i) = i
  enddo

From cbad838a341ab98e1f6397951553d47caa9154a3 Mon Sep 17 00:00:00 2001
From: Antoine MARIE <amarie@irsamc.ups-tlse.fr>
Date: Tue, 25 Feb 2025 16:06:01 +0100
Subject: [PATCH 38/59] increase hard coded max rank of cholesky for
 ao_two_e_int

---
 src/ao_two_e_ints/cholesky.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f
index d15ebdc3..36061ef0 100644
--- a/src/ao_two_e_ints/cholesky.irp.f
+++ b/src/ao_two_e_ints/cholesky.irp.f
@@ -178,7 +178,7 @@ END_PROVIDER
      rank_max = np
      ! Avoid too large arrays when there are many electrons
      if (elec_num > 10) then
-       rank_max = min(np,20*elec_num*elec_num)
+       rank_max = min(np,25*elec_num*elec_num)
      endif
 
      call mmap_create_d('', (/ ndim8, rank_max /), .False., .True., map)

From ed1253f62956c10866ea83c707cf532b7f3d4d67 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 25 Feb 2025 18:09:57 +0100
Subject: [PATCH 39/59] Sort indices for faster access in RDM

---
 external/ezfio                                |   2 +-
 .../basis_correction/print_routine.irp.f      |  97 ++++++++-------
 src/ao_two_e_ints/two_e_integrals.irp.f       |   1 +
 src/cas_based_on_top/on_top_cas_prov.irp.f    |  13 +-
 src/dft_utils_in_r/ao_in_r.irp.f              | 112 ++++++++----------
 src/dft_utils_in_r/mo_in_r.irp.f              |  15 +--
 src/mu_of_r/mu_of_r_conditions.irp.f          |  85 ++++++++++++-
 src/two_rdm_routines/davidson_like_2rdm.irp.f |  88 ++++++++------
 8 files changed, 248 insertions(+), 165 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f
index b3b38673..8879fd5d 100644
--- a/plugins/local/basis_correction/print_routine.irp.f
+++ b/plugins/local/basis_correction/print_routine.irp.f
@@ -22,53 +22,58 @@ subroutine print_basis_correction
  print*, '****************************************'
  print*, '****************************************'
  print*, 'mu_of_r_potential = ',mu_of_r_potential
- if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then
-   print*, ''
-   print*,'Using a HF-like two-body density to define mu(r)'
-   print*,'This assumes that HF is a qualitative representation of the wave function '
-   print*,'********************************************'
-   print*,'Functionals more suited for weak correlation'
-   print*,'********************************************'
-   print*,'+) LDA Ecmd functional     : purely based on the UEG (JCP,149,194301,1-15 (2018)) '
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD LDA           , state ',istate,' = ',ecmd_lda_mu_of_r(istate)
-   enddo
-   print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))'
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-UEG       , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate)
-   enddo
+ if(mu_of_r_potential.EQ."hf".or. &
+    mu_of_r_potential.EQ."hf_old".or.&
+    mu_of_r_potential.EQ."hf_sparse".or.&
+    mu_of_r_potential.EQ."proj")then
+     print*, ''
+     print*,'Using a HF-like two-body density to define mu(r)'
+     print*,'This assumes that HF is a qualitative representation of the wave function '
+     print*,'********************************************'
+     print*,'Functionals more suited for weak correlation'
+     print*,'********************************************'
+     print*,'+) LDA Ecmd functional     : purely based on the UEG (JCP,149,194301,1-15 (2018)) '
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD LDA           , state ',istate,' = ',ecmd_lda_mu_of_r(istate)
+     enddo
+     print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))'
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-UEG       , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate)
+     enddo
 
-  else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then
-   print*, ''
-   print*,'Using a CAS-like two-body density to define mu(r)'
-   print*,'This assumes that the CAS is a qualitative representation of the wave function '
-   print*,'********************************************'
-   print*,'Functionals more suited for weak correlation'
-   print*,'********************************************'
-   print*,'+) LDA Ecmd functional     : purely based on the UEG (JCP,149,194301,1-15 (2018)) '
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD LDA           , state ',istate,' = ',ecmd_lda_mu_of_r(istate)
-   enddo
-   print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))'
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-UEG       , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate)
-   enddo
-   print*,''
-   print*,'********************************************'
-   print*,'********************************************'
-   print*,'+) PBE-on-top Ecmd functional : JCP, 152, 174104 (2020) '
-   print*,'PBE at mu=0, extrapolated ontop pair density at large mu, usual spin-polarization'
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-OT        , state ',istate,' = ',ecmd_pbe_on_top_mu_of_r(istate)
-   enddo
-   print*,''
-   print*,'********************************************'
-   print*,'+) PBE-on-top no spin polarization Ecmd functional : JCP, 152, 174104 (2020)'
-   print*,'PBE at mu=0, extrapolated ontop pair density at large mu, and ZERO SPIN POLARIZATION'
-   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD SU-PBE-OT     , state ',istate,' = ',ecmd_pbe_on_top_su_mu_of_r(istate)
-   enddo
-   print*,''
+  else if(mu_of_r_potential.EQ."cas_full".or. &
+          mu_of_r_potential.EQ."cas_truncated".or. &
+          mu_of_r_potential.EQ."pure_act") then
+     print*, ''
+     print*,'Using a CAS-like two-body density to define mu(r)'
+     print*,'This assumes that the CAS is a qualitative representation of the wave function '
+     print*,'********************************************'
+     print*,'Functionals more suited for weak correlation'
+     print*,'********************************************'
+     print*,'+) LDA Ecmd functional     : purely based on the UEG (JCP,149,194301,1-15 (2018)) '
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD LDA           , state ',istate,' = ',ecmd_lda_mu_of_r(istate)
+     enddo
+     print*,'+) PBE-UEG Ecmd functional : PBE at mu=0, UEG ontop pair density at large mu (JPCL, 10, 2931-2937 (2019))'
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-UEG       , state ',istate,' = ',ecmd_pbe_ueg_mu_of_r(istate)
+     enddo
+     print*,''
+     print*,'********************************************'
+     print*,'********************************************'
+     print*,'+) PBE-on-top Ecmd functional : JCP, 152, 174104 (2020) '
+     print*,'PBE at mu=0, extrapolated ontop pair density at large mu, usual spin-polarization'
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD PBE-OT        , state ',istate,' = ',ecmd_pbe_on_top_mu_of_r(istate)
+     enddo
+     print*,''
+     print*,'********************************************'
+     print*,'+) PBE-on-top no spin polarization Ecmd functional : JCP, 152, 174104 (2020)'
+     print*,'PBE at mu=0, extrapolated ontop pair density at large mu, and ZERO SPIN POLARIZATION'
+     do istate = 1, N_states
+      write(*, '(A29,X,I3,X,A3,X,F16.10)') '  ECMD SU-PBE-OT     , state ',istate,' = ',ecmd_pbe_on_top_su_mu_of_r(istate)
+     enddo
+     print*,''
 
   endif
   print*,''
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index fb376ce1..1cb7617e 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -54,6 +54,7 @@ double precision function ao_two_e_integral(i, j, k, l)
   else if (use_only_lr) then
 
     ao_two_e_integral = ao_two_e_integral_erf(i, j, k, l)
+    return
 
   else if (do_schwartz_accel(i,j,k,l)) then
 
diff --git a/src/cas_based_on_top/on_top_cas_prov.irp.f b/src/cas_based_on_top/on_top_cas_prov.irp.f
index dd93ed40..9f9a1f06 100644
--- a/src/cas_based_on_top/on_top_cas_prov.irp.f
+++ b/src/cas_based_on_top/on_top_cas_prov.irp.f
@@ -15,14 +15,17 @@
  pure_act_on_top_of_r = 0.d0
   do l = 1, n_act_orb
    phi_l = act_mos_in_r_array(l,ipoint)
+   if (dabs(phi_l) < 1.d-12) cycle
    do k = 1, n_act_orb
-    phi_k = act_mos_in_r_array(k,ipoint)
+    phi_k = act_mos_in_r_array(k,ipoint) * phi_l
+    if (dabs(phi_k) < 1.d-12) cycle
      do j = 1, n_act_orb
-      phi_j = act_mos_in_r_array(j,ipoint)
+      phi_j = act_mos_in_r_array(j,ipoint) * phi_k
+      if (dabs(phi_j) < 1.d-12) cycle
       do i = 1, n_act_orb
-       phi_i = act_mos_in_r_array(i,ipoint)
-       !                                       1 2 1 2
-       pure_act_on_top_of_r += act_2_rdm_ab_mo(i,j,k,l,istate) * phi_i * phi_j * phi_k * phi_l
+       phi_i = act_mos_in_r_array(i,ipoint) * phi_j
+       !                                                             1 2 1 2
+       pure_act_on_top_of_r = pure_act_on_top_of_r + act_2_rdm_ab_mo(i,j,k,l,istate) * phi_i !* phi_j * phi_k * phi_l
      enddo
     enddo
    enddo
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index c8822776..ffd8c9d5 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -8,21 +8,14 @@ BEGIN_PROVIDER[double precision, aos_in_r_array, (ao_num,n_points_final_grid)]
   END_DOC
 
   implicit none
-  integer          :: i, j
-  double precision :: tmp_array(ao_num), r(3)
+  integer          :: i
 
   !$OMP PARALLEL DO               &
   !$OMP DEFAULT (NONE)            &
-  !$OMP PRIVATE (i,r,tmp_array,j) & 
-  !$OMP SHARED(aos_in_r_array,n_points_final_grid,ao_num,final_grid_points)
+  !$OMP PRIVATE (i) &
+  !$OMP SHARED(aos_in_r_array,n_points_final_grid,final_grid_points)
   do i = 1, n_points_final_grid
-    r(1) = final_grid_points(1,i)
-    r(2) = final_grid_points(2,i)
-    r(3) = final_grid_points(3,i)
-    call give_all_aos_at_r(r, tmp_array)
-    do j = 1, ao_num
-      aos_in_r_array(j,i) = tmp_array(j)
-    enddo
+    call give_all_aos_at_r(final_grid_points(1,i), aos_in_r_array(1,i))
   enddo
   !$OMP END PARALLEL DO
 
@@ -42,7 +35,7 @@ BEGIN_PROVIDER[double precision, aos_in_r_array_transp, (n_points_final_grid,ao_
 
   do i = 1, n_points_final_grid
     do j = 1, ao_num
-      aos_in_r_array_transp(i,j) = aos_in_r_array(j,i) 
+      aos_in_r_array_transp(i,j) = aos_in_r_array(j,i)
     enddo
   enddo
 
@@ -62,27 +55,29 @@ BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_gri
 
   implicit none
   integer          :: i, j, m
-  double precision :: aos_array(ao_num), r(3)
-  double precision :: aos_grad_array(3,ao_num)
+  double precision :: r(3)
+  double precision, allocatable :: aos_grad_array(:,:), aos_array(:)
 
-  !$OMP PARALLEL DO                                &
+  !$OMP PARALLEL                                   &
   !$OMP DEFAULT (NONE)                             &
-  !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & 
+  !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) &
   !$OMP SHARED(aos_grad_in_r_array,n_points_final_grid,ao_num,final_grid_points)
+  allocate(aos_grad_array(3,ao_num), aos_array(ao_num))
+
+  !$OMP DO
   do i = 1, n_points_final_grid
-    r(1) = final_grid_points(1,i)
-    r(2) = final_grid_points(2,i)
-    r(3) = final_grid_points(3,i)
-    call give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array)
+    call give_all_aos_and_grad_at_r(final_grid_points(1,i),aos_array,aos_grad_array)
     do m = 1, 3
       do j = 1, ao_num
         aos_grad_in_r_array(j,i,m) = aos_grad_array(m,j)
       enddo
     enddo
   enddo
-  !$OMP END PARALLEL DO
+  !$OMP END DO
+  deallocate(aos_grad_array,aos_array)
+  !$OMP END PARALLEL
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
@@ -116,7 +111,7 @@ END_PROVIDER
    enddo
   enddo
  enddo
- END_PROVIDER 
+ END_PROVIDER
 
  BEGIN_PROVIDER [double precision, aos_lapl_in_r_array, (3,ao_num,n_points_final_grid)]
  implicit none
@@ -126,32 +121,32 @@ END_PROVIDER
  ! k = 1 : x, k= 2, y, k  3, z
  END_DOC
  integer :: i,j,m
- double precision :: aos_array(ao_num), r(3)
- double precision :: aos_grad_array(3,ao_num)
- double precision :: aos_lapl_array(3,ao_num)
- !$OMP PARALLEL DO &
+ double precision, allocatable :: aos_lapl_array(:,:), aos_grad_array(:,:), aos_array(:)
+
+ !$OMP PARALLEL        &
  !$OMP DEFAULT (NONE)  &
- !$OMP PRIVATE (i,r,aos_array,aos_grad_array,aos_lapl_array,j,m) & 
+ !$OMP PRIVATE (i,aos_array,aos_grad_array,aos_lapl_array,j,m) &
  !$OMP SHARED(aos_lapl_in_r_array,n_points_final_grid,ao_num,final_grid_points)
+ allocate( aos_array(ao_num), aos_grad_array(3,ao_num), aos_lapl_array(3,ao_num))
+ !$OMP DO 
  do i = 1, n_points_final_grid
-  r(1) = final_grid_points(1,i)
-  r(2) = final_grid_points(2,i)
-  r(3) = final_grid_points(3,i)
-  call give_all_aos_and_grad_and_lapl_at_r(r,aos_array,aos_grad_array,aos_lapl_array)
+  call give_all_aos_and_grad_and_lapl_at_r(final_grid_points(1,i),aos_array,aos_grad_array,aos_lapl_array)
   do j = 1, ao_num
    do m = 1, 3
     aos_lapl_in_r_array(m,j,i) = aos_lapl_array(m,j)
    enddo
   enddo
  enddo
- !$OMP END PARALLEL DO
+ !$OMP END DO
+ deallocate( aos_array, aos_grad_array, aos_lapl_array)
+ !$OMP END PARALLEL
  END_PROVIDER
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array_transp_bis, (n_points_final_grid,ao_num,3)]
  implicit none
  BEGIN_DOC
-! Transposed gradients 
-! 
+! Transposed gradients
+!
  END_DOC
  integer :: i,j,m
  double precision :: aos_array(ao_num), r(3)
@@ -169,8 +164,8 @@ END_PROVIDER
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array_transp_3, (3,n_points_final_grid,ao_num)]
  implicit none
  BEGIN_DOC
-! Transposed gradients 
-! 
+! Transposed gradients
+!
  END_DOC
  integer :: i,j,m
  double precision :: aos_array(ao_num), r(3)
@@ -187,22 +182,14 @@ END_PROVIDER
  BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
  implicit none
  BEGIN_DOC
- ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point of the EXTRA grid 
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point of the EXTRA grid
  END_DOC
- integer :: i,j
- double precision :: aos_array(ao_num), r(3)
+ integer :: i
  !$OMP PARALLEL DO &
- !$OMP DEFAULT (NONE)  &
- !$OMP PRIVATE (i,r,aos_array,j) & 
- !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ !$OMP DEFAULT (NONE)  PRIVATE (i) &
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,final_grid_points_extra)
  do i = 1, n_points_extra_final_grid
-  r(1) = final_grid_points_extra(1,i)
-  r(2) = final_grid_points_extra(2,i)
-  r(3) = final_grid_points_extra(3,i)
-  call give_all_aos_at_r(r,aos_array)
-  do j = 1, ao_num
-   aos_in_r_array_extra(j,i) = aos_array(j)
-  enddo
+  call give_all_aos_at_r(final_grid_points_extra(1,i),aos_in_r_array_extra(1,i))
  enddo
  !$OMP END PARALLEL DO
 
@@ -214,9 +201,9 @@ END_PROVIDER
 BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
 
   BEGIN_DOC
-  ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid 
+  ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point of the EXTRA grid
   END_DOC
- 
+
   implicit none
   integer          :: i, j
   double precision :: aos_array(ao_num), r(3)
@@ -235,27 +222,28 @@ BEGIN_PROVIDER[double precision, aos_grad_in_r_array_extra, (ao_num,n_points_ext
 
   implicit none
   integer          :: i, j, m
-  double precision :: aos_array(ao_num), r(3)
-  double precision :: aos_grad_array(3,ao_num)
+  double precision, allocatable :: aos_array(:), aos_grad_array(:,:)
 
-  !$OMP PARALLEL DO                                &
+
+  !$OMP PARALLEL                                   &
   !$OMP DEFAULT (NONE)                             &
-  !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & 
+  !$OMP PRIVATE (i,j,m,aos_array,aos_grad_array) &
   !$OMP SHARED(aos_grad_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+  allocate(aos_array(ao_num), aos_grad_array(3,ao_num))
+  !$OMP DO
   do i = 1, n_points_extra_final_grid
-    r(1) = final_grid_points_extra(1,i)
-    r(2) = final_grid_points_extra(2,i)
-    r(3) = final_grid_points_extra(3,i)
-    call give_all_aos_and_grad_at_r(r, aos_array, aos_grad_array)
+    call give_all_aos_and_grad_at_r(final_grid_points_extra(1,i), aos_array, aos_grad_array)
     do m = 1, 3
       do j = 1, ao_num
         aos_grad_in_r_array_extra(j,i,m) = aos_grad_array(m,j)
       enddo
     enddo
   enddo
-  !$OMP END PARALLEL DO
+  !$OMP END DO
+  deallocate(aos_array,aos_grad_array)
+  !$OMP END PARALLEL
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f
index 623de4f8..f3cc5559 100644
--- a/src/dft_utils_in_r/mo_in_r.irp.f
+++ b/src/dft_utils_in_r/mo_in_r.irp.f
@@ -21,20 +21,11 @@
  BEGIN_DOC
  ! mos_in_r_array(i,j)        = value of the ith mo on the jth grid point
  END_DOC
- integer :: i,j
- double precision :: mos_array(mo_num), r(3)
- !$OMP PARALLEL DO &
- !$OMP DEFAULT (NONE)  &
- !$OMP PRIVATE (i,r,mos_array,j) & 
+ integer :: i
+ !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE (i) & 
  !$OMP SHARED(mos_in_r_array_omp,n_points_final_grid,mo_num,final_grid_points)
  do i = 1, n_points_final_grid
-  r(1) = final_grid_points(1,i)
-  r(2) = final_grid_points(2,i)
-  r(3) = final_grid_points(3,i)
-  call give_all_mos_at_r(r,mos_array)
-  do j = 1, mo_num
-   mos_in_r_array_omp(j,i) = mos_array(j)
-  enddo
+  call give_all_mos_at_r(final_grid_points(1,i),mos_in_r_array_omp(1,i))
  enddo
  !$OMP END PARALLEL DO
  END_PROVIDER
diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f
index 88dad8c3..3ad52a05 100644
--- a/src/mu_of_r/mu_of_r_conditions.irp.f
+++ b/src/mu_of_r/mu_of_r_conditions.irp.f
@@ -22,22 +22,32 @@
  endif
 
  do istate = 1, N_states
-  do ipoint = 1, n_points_final_grid
    if(mu_of_r_potential.EQ."hf")then
-    mu_of_r_prov(ipoint,istate) =  mu_of_r_hf(ipoint)
+     do ipoint = 1, n_points_final_grid
+       mu_of_r_prov(ipoint,istate) =  mu_of_r_hf(ipoint)
+     enddo
    else if(mu_of_r_potential.EQ."hf_old")then
-    mu_of_r_prov(ipoint,istate) =  mu_of_r_hf_old(ipoint)
+     do ipoint = 1, n_points_final_grid
+       mu_of_r_prov(ipoint,istate) =  mu_of_r_hf_old(ipoint)
+     enddo
    else if(mu_of_r_potential.EQ."hf_sparse")then
-    mu_of_r_prov(ipoint,istate) =  mu_of_r_hf_sparse(ipoint)
+     do ipoint = 1, n_points_final_grid
+       mu_of_r_prov(ipoint,istate) =  mu_of_r_hf_sparse(ipoint)
+     enddo
    else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then
-    mu_of_r_prov(ipoint,istate) =  mu_of_r_psi_cas(ipoint,istate)
+     do ipoint = 1, n_points_final_grid
+       mu_of_r_prov(ipoint,istate) =  mu_of_r_psi_cas(ipoint,istate)
+     enddo
+   else if(mu_of_r_potential.EQ."proj")then
+     do ipoint = 1, n_points_final_grid
+       mu_of_r_prov(ipoint,istate) =  mu_of_r_projector_mo(ipoint)
+     enddo
    else
     print*,'you requested the following mu_of_r_potential'
     print*,mu_of_r_potential
     print*,'which does not correspond to any of the options for such keyword'
     stop
    endif
-  enddo
  enddo
 
  if (write_mu_of_r) then
@@ -225,3 +235,66 @@
  enddo
  END_PROVIDER
 
+
+BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ]
+ implicit none
+ BEGIN_DOC
+ ! mu(r) computed with the projector onto the atomic basis
+ !  P_B(\mathbf{r},\mathbf{r}') = \sum_{ij} |
+ !  \chi_{i} \rangle \left[S^{-1}\right]_{ij} \langle \chi_{j} |
+ !  \] where $i$ and $j$ denote all atomic orbitals.
+ END_DOC
+
+ double precision, parameter :: factor = dsqrt(2.d0*dacos(-1.d0))
+ double precision, allocatable :: tmp(:,:)
+ integer :: ipoint
+
+
+ do ipoint=1,n_points_final_grid
+   mu_of_r_projector_mo(ipoint) = 0.d0
+   integer :: i,j
+   do j=1,n_inact_act_orb
+     i = list_inact_act(j)
+     mu_of_r_projector_mo(ipoint) = mu_of_r_projector_mo(ipoint) + &
+         mos_in_r_array_omp(i,ipoint) * mos_in_r_array_omp(i,ipoint)
+   enddo
+   do j=1,n_virt_orb
+     i = list_virt(j)
+     mu_of_r_projector_mo(ipoint) = mu_of_r_projector_mo(ipoint) + &
+         mos_in_r_array_omp(i,ipoint) * mos_in_r_array_omp(i,ipoint)
+   enddo
+ enddo
+
+ do ipoint=1,n_points_final_grid
+   ! epsilon
+   mu_of_r_projector_mo(ipoint) = 1.d0/(2.d0*dacos(-1.d0) * mu_of_r_projector_mo(ipoint)**(2.d0/3.d0))
+   ! mu
+   mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( mu_of_r_projector_mo(ipoint) )
+ enddo
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [double precision, mu_average_proj, (N_states)]
+ implicit none
+ BEGIN_DOC
+ ! average value of mu(r) weighted with the total one-e density and divided by the number of electrons
+ !
+ ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals
+ !
+ ! in the one- and two-body density matrix are excluded
+ END_DOC
+ integer :: ipoint,istate
+ double precision :: weight,density
+ do istate = 1, N_states
+  mu_average_proj(istate) = 0.d0
+  do ipoint = 1, n_points_final_grid
+   weight =final_weight_at_r_vector(ipoint)
+   density = one_e_dm_and_grad_alpha_in_r(4,ipoint,istate) &
+           + one_e_dm_and_grad_beta_in_r(4,ipoint,istate)
+   mu_average_proj(istate) += mu_of_r_projector_mo(ipoint) * weight * density
+  enddo
+  mu_average_proj(istate) = mu_average_proj(istate) / elec_num_grid_becke(istate)
+ enddo
+END_PROVIDER
+
diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f
index 09436663..f0b40459 100644
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@@ -139,6 +139,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
 
    call list_to_bitstring( orb_bitmask, list_orb, norb, N_int)
    sze_buff = 6 * norb + elec_alpha_num * elec_alpha_num * 60
+   sze_buff = sze_buff*100
    list_orb_reverse = -1000
    do i = 1, norb
     list_orb_reverse(list_orb(i)) = i
@@ -191,7 +192,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
    ASSERT (istart > 0)
    ASSERT (istep  > 0)
 
-    !$OMP DO SCHEDULE(dynamic,64)
+    !$OMP DO SCHEDULE(dynamic)
    do k_a=istart+ishift,iend,istep
 
      krow = psi_bilinear_matrix_rows(k_a)
@@ -272,14 +273,14 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
        enddo
       endif
 
-      call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-      nkeys = 0
      enddo
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!     nkeys = 0
 
    enddo
      !$OMP END DO
 
-     !$OMP DO SCHEDULE(dynamic,64)
+     !$OMP DO SCHEDULE(dynamic)
    do k_a=istart+ishift,iend,istep
 
 
@@ -343,24 +344,24 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
          c_1(l) = u_t(l,l_a) * u_t(l,k_a)
        enddo
        if(alpha_beta.or.spin_trace.or.alpha_alpha)then
-       ! increment the alpha/beta part for single excitations
-       if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then
-         call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-         nkeys = 0
-       endif
-       call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-       ! increment the alpha/alpha part for single excitations
-       if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then
-         call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-         nkeys = 0
-       endif
-       call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+         ! increment the alpha/beta part for single excitations
+         if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then
+           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           nkeys = 0
+         endif
+         call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+         ! increment the alpha/alpha part for single excitations
+         if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then
+           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           nkeys = 0
+         endif
+         call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
        endif
 
      enddo
 
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!     nkeys = 0
 
      ! Compute Hij for all alpha doubles
      ! ----------------------------------
@@ -383,8 +384,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
         call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
       enddo
      endif
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!     nkeys = 0
 
 
      ! Single and double beta excitations
@@ -459,8 +460,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
         call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
        endif
      enddo
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!     nkeys = 0
 
      ! Compute Hij for all beta doubles
      ! ----------------------------------
@@ -487,8 +488,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
 
       enddo
      endif
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!     nkeys = 0
 
 
      ! Diagonal contribution
@@ -550,22 +551,43 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc
 
  integer :: istate
  integer :: i,h1,h2,p1,p2
- call omp_set_lock(lock_2rdm)
+ integer, allocatable :: iorder(:)
+ integer*8, allocatable :: to_sort(:)
+
+ allocate(iorder(nkeys))
+ do i=1,nkeys
+   iorder(i) = i
+ enddo
+
+ ! If the lock is already taken, sort the keys while waiting for a faster access
+ if (.not.omp_test_lock(lock_2rdm)) then
+   allocate(to_sort(nkeys))
+   do i=1,nkeys
+     h1 = keys(1,iorder(i))
+     h2 = keys(2,iorder(i))-1
+     p1 = keys(3,iorder(i))-1
+     p2 = keys(4,iorder(i))-1
+     to_sort(i) = int(h1,8) + int(dim1,8)*(int(h2,8) + int(dim1,8)*(int(p1,8) + int(dim1,8)*int(p2,8)))
+   enddo
+   call i8sort(to_sort, iorder, nkeys)
+   deallocate(to_sort)
+   call omp_set_lock(lock_2rdm)
+ endif
 
 ! print*,'*************'
 ! print*,'updating'
 ! print*,'nkeys',nkeys
- do i = 1, nkeys
-   h1 = keys(1,i)
-   h2 = keys(2,i)
-   p1 = keys(3,i)
-   p2 = keys(4,i)
-   do istate = 1, N_st
-!    print*,h1,h2,p1,p2,values(istate,i)
-    big_array(h1,h2,p1,p2,istate) += values(istate,i)
+ do istate = 1, N_st
+   do i = 1, nkeys
+    h1 = keys(1,iorder(i))
+    h2 = keys(2,iorder(i))
+    p1 = keys(3,iorder(i))
+    p2 = keys(4,iorder(i))
+    big_array(h1,h2,p1,p2,istate) = big_array(h1,h2,p1,p2,istate) + values(istate,iorder(i))
    enddo
  enddo
  call omp_unset_lock(lock_2rdm)
+ deallocate(iorder)
 
 end
 

From 62c13860bae511e8641208b880d71c6176129325 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 26 Feb 2025 14:42:39 +0100
Subject: [PATCH 40/59] update two_rdm_routines

---
 src/bitmask/bitmasks_routines.irp.f           |   39 +-
 src/two_body_rdm/act_2_rdm.irp.f              |    1 +
 src/two_rdm_routines/davidson_like_2rdm.irp.f |    2 +
 src/two_rdm_routines/update_rdm.irp.f         | 1647 +++++++++--------
 4 files changed, 854 insertions(+), 835 deletions(-)

diff --git a/src/bitmask/bitmasks_routines.irp.f b/src/bitmask/bitmasks_routines.irp.f
index 9c6f4f0c..be0a2dd1 100644
--- a/src/bitmask/bitmasks_routines.irp.f
+++ b/src/bitmask/bitmasks_routines.irp.f
@@ -283,33 +283,16 @@ subroutine print_det_one_dimension(string,Nint)
 
 end
 
-logical function is_integer_in_string(bite,string,Nint)
-  use bitmasks
+logical function is_integer_in_string(orb,bitmask,Nint)
+ use bitmasks
  implicit none
- integer, intent(in) :: bite,Nint
- integer(bit_kind), intent(in) :: string(Nint)
- integer(bit_kind) :: string_bite(Nint)
- integer   :: i,itot,itot_and
- character*(2048)                :: output(1)
- string_bite = 0_bit_kind
- call set_bit_to_integer(bite,string_bite,Nint)
- itot = 0
- itot_and = 0
- is_integer_in_string = .False.
-!print*,''
-!print*,''
-!print*,'bite = ',bite
-!call bitstring_to_str( output(1), string_bite, Nint )
-! print *,  trim(output(1))
-!call bitstring_to_str( output(1), string, Nint )
-! print *,  trim(output(1))
- do i = 1, Nint
-  itot += popcnt(string(i))
-  itot_and += popcnt(ior(string(i),string_bite(i)))
- enddo
-!print*,'itot,itot_and',itot,itot_and
- if(itot == itot_and)then
-  is_integer_in_string = .True. 
- endif
-!pause 
+ BEGIN_DOC
+! Checks is the orbital orb is set to 1 in the bit string
+ END_DOC
+ integer, intent(in) :: orb, Nint
+ integer(bit_kind), intent(in) :: bitmask(Nint)
+ integer :: j, k
+ k = ishft(orb-1,-bit_kind_shift)+1
+ j = orb-ishft(k-1,bit_kind_shift)-1
+ is_integer_in_string = iand(bitmask(k), ibset(0_bit_kind, j)) /= 0_bit_kind
 end
diff --git a/src/two_body_rdm/act_2_rdm.irp.f b/src/two_body_rdm/act_2_rdm.irp.f
index 9e2ea018..6cad72be 100644
--- a/src/two_body_rdm/act_2_rdm.irp.f
+++ b/src/two_body_rdm/act_2_rdm.irp.f
@@ -145,6 +145,7 @@
  print*,''
  print*,'Providing act_2_rdm_spin_trace_mo '
  character*(128) :: name_file
+ PROVIDE all_mo_integrals
  name_file = 'act_2_rdm_spin_trace_mo'
  ispin = 4
  act_2_rdm_spin_trace_mo = 0.d0
diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f
index f0b40459..0e6899f8 100644
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@@ -194,6 +194,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
 
     !$OMP DO SCHEDULE(dynamic)
    do k_a=istart+ishift,iend,istep
+!print *, 'aa', k_a, '/', iend
 
      krow = psi_bilinear_matrix_rows(k_a)
      ASSERT (krow <= N_det_alpha_unique)
@@ -282,6 +283,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
 
      !$OMP DO SCHEDULE(dynamic)
    do k_a=istart+ishift,iend,istep
+!print *, 'ab', k_a, '/', iend
 
 
      ! Single and double alpha exitations
diff --git a/src/two_rdm_routines/update_rdm.irp.f b/src/two_rdm_routines/update_rdm.irp.f
index 8aeb0699..978f6e33 100644
--- a/src/two_rdm_routines/update_rdm.irp.f
+++ b/src/two_rdm_routines/update_rdm.irp.f
@@ -1,762 +1,897 @@
-  subroutine orb_range_diag_to_all_states_2_rdm_dm_buffer(det_1,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+logical function is_integer_in_string_local(orb,bitmask,Nint)
+  use bitmasks
+  implicit none
+  integer, intent(in)            :: orb, Nint
+  integer(bit_kind), intent(in)  :: bitmask(Nint)
+  integer                        :: j, k
+  k = ishft(orb-1,-bit_kind_shift)+1
+  j = orb-ishft(k-1,bit_kind_shift)-1
+  is_integer_in_string_local = iand(bitmask(k), ibset(0_bit_kind, j)) /= 0_bit_kind
+end
+
+subroutine orb_range_diag_to_all_states_2_rdm_dm_buffer(det_1,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
   use bitmasks
   BEGIN_DOC
- !  routine that update the DIAGONAL PART of the two body rdms in a specific range of orbitals for a given determinant det_1
- !  
- !  c_1 is the array of the contributions to the rdm for all states 
- !  
- !  orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- ! 
- !  ispin determines which spin-spin component of the two-rdm you will update 
- ! 
- !  ispin   == 1 :: alpha/ alpha
- !  ispin   == 2 :: beta / beta
- !  ispin   == 3 :: alpha/ beta
- !  ispin   == 4 :: spin traced <=> total two-rdm 
- END_DOC
- implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer, intent(in) :: list_orb_reverse(mo_num)
- integer(bit_kind), intent(in)  :: det_1(N_int,2)
- integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
-
- integer                        :: occ(N_int*bit_kind_size,2)
- integer                        :: n_occ_ab(2)
- integer :: i,j,h1,h2
- integer(bit_kind) :: det_1_act(N_int,2)
- logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
- do i = 1, N_int
-  det_1_act(i,1) =   iand(det_1(i,1),orb_bitmask(i)) 
-  det_1_act(i,2) =   iand(det_1(i,2),orb_bitmask(i)) 
- enddo
- 
- alpha_alpha = .False.   
- beta_beta   = .False.
- alpha_beta  = .False.
- spin_trace  = .False.
- if(     ispin == 1)then
-  alpha_alpha = .True.
- else if(ispin == 2)then
-  beta_beta   = .True.
- else if(ispin == 3)then
-  alpha_beta  = .True.
- else if(ispin == 4)then
-  spin_trace  = .True.
- endif
- call bitstring_to_list_ab(det_1_act, occ, n_occ_ab, N_int)
- logical :: is_integer_in_string
- integer :: i1,i2,istate
- if(alpha_beta)then
-   do i = 1, n_occ_ab(1)
-    i1 = occ(i,1)
-    do j = 1, n_occ_ab(2)
-     i2 = occ(j,2)
-     h1 = list_orb_reverse(i1)
-     h2 = list_orb_reverse(i2)
-      ! If alpha/beta, electron 1 is alpha, electron 2 is beta
-      ! Therefore you don't necessayr have symmetry between electron 1 and 2 
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = 0.5d0 * c_1(istate)
-     enddo
-     keys(1,nkeys) = h1
-     keys(2,nkeys) = h2
-     keys(3,nkeys) = h1
-     keys(4,nkeys) = h2
-
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = 0.5d0 * c_1(istate)
-     enddo
-     keys(1,nkeys) = h2
-     keys(2,nkeys) = h1
-     keys(3,nkeys) = h2
-     keys(4,nkeys) = h1
-    enddo 
-   enddo
-
- else if (alpha_alpha)then
-  do i = 1, n_occ_ab(1)
-   i1 = occ(i,1)
-   do j = 1, n_occ_ab(1)
-    i2 = occ(j,1)
-    h1 = list_orb_reverse(i1)
-    h2 = list_orb_reverse(i2)
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h1
-    keys(4,nkeys) = h2
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = -0.5d0 * c_1(istate) 
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h2
-    keys(4,nkeys) = h1
-   enddo
-  enddo
- else if (beta_beta)then
-  do i = 1, n_occ_ab(2)
-   i1 = occ(i,2)
-   do j = 1, n_occ_ab(2)
-    i2 = occ(j,2)
-    h1 = list_orb_reverse(i1)
-    h2 = list_orb_reverse(i2)
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h1
-    keys(4,nkeys) = h2
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = -0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h2
-    keys(4,nkeys) = h1
-   enddo
-  enddo
- else if(spin_trace)then
-  ! 0.5 * (alpha beta + beta alpha)
-  do i = 1, n_occ_ab(1)
-   i1 = occ(i,1)
-   do j = 1, n_occ_ab(2)
-    i2 = occ(j,2)
-    h1 = list_orb_reverse(i1)
-    h2 = list_orb_reverse(i2)
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h1
-    keys(4,nkeys) = h2
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h2
-    keys(2,nkeys) = h1
-    keys(3,nkeys) = h2
-    keys(4,nkeys) = h1
-   enddo 
-  enddo
-  do i = 1, n_occ_ab(1)
-   i1 = occ(i,1)
-   do j = 1, n_occ_ab(1)
-    i2 = occ(j,1)
-    h1 = list_orb_reverse(i1)
-    h2 = list_orb_reverse(i2)
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h1
-    keys(4,nkeys) = h2
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = -0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h2
-    keys(4,nkeys) = h1
-   enddo
-  enddo
-  do i = 1, n_occ_ab(2)
-   i1 = occ(i,2)
-   do j = 1, n_occ_ab(2)
-    i2 = occ(j,2)
-    h1 = list_orb_reverse(i1)
-    h2 = list_orb_reverse(i2)
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h1
-    keys(4,nkeys) = h2
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = -0.5d0 * c_1(istate)
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = h2
-    keys(4,nkeys) = h1
-   enddo
-  enddo
- endif
- end
-
-
- subroutine orb_range_off_diag_double_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
- use bitmasks
- BEGIN_DOC
-! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
-!
-! a given couple of determinant det_1, det_2 being a alpha/beta DOUBLE excitation with respect to one another
-! 
-!  c_1 is the array of the contributions to the rdm for all states 
-! 
-! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
-!
-! ispin determines which spin-spin component of the two-rdm you will update 
-!
-! ispin   == 1 :: alpha/ alpha
-! ispin   == 2 :: beta / beta
-! ispin   == 3 :: alpha/ beta
-! ispin   == 4 :: spin traced <=> total two-rdm 
-!
-! here, only ispin == 3 or 4 will do something
- END_DOC
- implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
- integer, intent(in) :: list_orb_reverse(mo_num)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- integer :: i,j,h1,h2,p1,p2,istate
- integer                        :: exc(0:2,2,2)
- double precision               :: phase
- logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
- logical :: is_integer_in_string
- alpha_alpha = .False.   
- beta_beta   = .False.
- alpha_beta  = .False.
- spin_trace  = .False.
- if(     ispin == 1)then
-  alpha_alpha = .True.
- else if(ispin == 2)then
-  beta_beta   = .True.
- else if(ispin == 3)then
-  alpha_beta  = .True.
- else if(ispin == 4)then
-  spin_trace  = .True.
- endif
- call get_double_excitation(det_1,det_2,exc,phase,N_int)
- h1 = exc(1,1,1) 
- if(list_orb_reverse(h1).lt.0)return
- h1 = list_orb_reverse(h1)
- h2 = exc(1,1,2) 
- if(list_orb_reverse(h2).lt.0)return
- h2 = list_orb_reverse(h2)
- p1 = exc(1,2,1)
- if(list_orb_reverse(p1).lt.0)return
- p1 = list_orb_reverse(p1)
- p2 = exc(1,2,2)
- if(list_orb_reverse(p2).lt.0)return
- p2 = list_orb_reverse(p2)
- if(alpha_beta)then
-   nkeys += 1
-   do istate = 1, N_st
-    values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-   enddo
-   keys(1,nkeys) = h1
-   keys(2,nkeys) = h2
-   keys(3,nkeys) = p1
-   keys(4,nkeys) = p2
-   nkeys += 1
-   do istate = 1, N_st
-    values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-   enddo
-   keys(1,nkeys) = h2
-   keys(2,nkeys) = h1
-   keys(3,nkeys) = p2
-   keys(4,nkeys) = p1 
- else if(spin_trace)then
-   nkeys += 1
-   do istate = 1, N_st
-    values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-   enddo
-   keys(1,nkeys) = h1
-   keys(2,nkeys) = h2
-   keys(3,nkeys) = p1
-   keys(4,nkeys) = p2
-   nkeys += 1
-   do istate = 1, N_st
-    values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-   enddo
-   keys(1,nkeys) = h2
-   keys(2,nkeys) = h1
-   keys(3,nkeys) = p2
-   keys(4,nkeys) = p1 
- endif
- end
-
-  subroutine orb_range_off_diag_single_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-  use bitmasks
-  BEGIN_DOC
- ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
- !
- ! a given couple of determinant det_1, det_2 being a SINGLE excitation with respect to one another
- ! 
- ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
- ! 
- ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation 
- !
- ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- !
- ! ispin determines which spin-spin component of the two-rdm you will update 
- !
- ! ispin   == 1 :: alpha/ alpha
- ! ispin   == 2 :: beta / beta
- ! ispin   == 3 :: alpha/ beta
- ! ispin   == 4 :: spin traced <=> total two-rdm 
- !
- ! here, only ispin == 3 or 4 will do something
+  !  routine that update the DIAGONAL PART of the two body rdms in a specific range of orbitals for a given determinant det_1
+  !
+  !  c_1 is the array of the contributions to the rdm for all states
+  !
+  !  orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  !  ispin determines which spin-spin component of the two-rdm you will update
+  !
+  !  ispin   == 1                :: alpha/ alpha
+  !  ispin   == 2                :: beta / beta
+  !  ispin   == 3                :: alpha/ beta
+  !  ispin   == 4                :: spin traced <=> total two-rdm
   END_DOC
   implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
- integer, intent(in) :: list_orb_reverse(mo_num)
- integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- 
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  integer(bit_kind), intent(in)  :: det_1(N_int,2)
+  integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
   integer                        :: occ(N_int*bit_kind_size,2)
   integer                        :: n_occ_ab(2)
-  integer :: i,j,h1,h2,p1,istate
-  integer                        :: exc(0:2,2,2)
-  double precision               :: phase
- 
+  integer                        :: i,j,h1,h2
+  integer(bit_kind)              :: det_1_act(N_int,2)
   logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
-  logical :: is_integer_in_string
-  alpha_alpha = .False.   
+  do i = 1, N_int
+    det_1_act(i,1) =   iand(det_1(i,1),orb_bitmask(i))
+    det_1_act(i,2) =   iand(det_1(i,2),orb_bitmask(i))
+  enddo
+
+  alpha_alpha = .False.
   beta_beta   = .False.
   alpha_beta  = .False.
   spin_trace  = .False.
   if(     ispin == 1)then
-   alpha_alpha = .True.
+    alpha_alpha = .True.
   else if(ispin == 2)then
-   beta_beta   = .True.
+    beta_beta   = .True.
   else if(ispin == 3)then
-   alpha_beta  = .True.
+    alpha_beta  = .True.
   else if(ispin == 4)then
-   spin_trace  = .True.
+    spin_trace  = .True.
   endif
- 
+
+  call bitstring_to_list_ab(det_1_act, occ, n_occ_ab, N_int)
+  logical                        :: is_integer_in_string_local
+  integer                        :: i1,i2,istate
+  if(alpha_beta)then
+    do i = 1, n_occ_ab(1)
+      i1 = occ(i,1)
+      h1 = list_orb_reverse(i1)
+      do j = 1, n_occ_ab(2)
+        i2 = occ(j,2)
+        h2 = list_orb_reverse(i2)
+        ! If alpha/beta, electron 1 is alpha, electron 2 is beta
+        ! Therefore you don't necessayr have symmetry between electron 1 and 2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+    enddo
+
+  else if (alpha_alpha)then
+
+    do i = 1, n_occ_ab(1)
+      i1 = occ(i,1)
+      h1 = list_orb_reverse(i1)
+      do j = 1, n_occ_ab(1)
+        i2 = occ(j,1)
+        h2 = list_orb_reverse(i2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = -0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+    enddo
+
+  else if (beta_beta)then
+
+    do i = 1, n_occ_ab(2)
+      i1 = occ(i,2)
+      h1 = list_orb_reverse(i1)
+      do j = 1, n_occ_ab(2)
+        i2 = occ(j,2)
+        h2 = list_orb_reverse(i2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = -0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+    enddo
+
+  else if(spin_trace)then
+
+    ! 0.5 * (alpha beta + beta alpha)
+    do i = 1, n_occ_ab(1)
+      i1 = occ(i,1)
+      h1 = list_orb_reverse(i1)
+      do j = 1, n_occ_ab(2)
+        i2 = occ(j,2)
+        h2 = list_orb_reverse(i2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+      do j = 1, n_occ_ab(1)
+        i2 = occ(j,1)
+        h2 = list_orb_reverse(i2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = -0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+    enddo
+    do i = 1, n_occ_ab(2)
+      i1 = occ(i,2)
+      h1 = list_orb_reverse(i1)
+      do j = 1, n_occ_ab(2)
+        i2 = occ(j,2)
+        h2 = list_orb_reverse(i2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = 0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = -0.5d0 * c_1(istate)
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = h1
+      enddo
+    enddo
+  endif
+end
+
+
+subroutine orb_range_off_diag_double_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+  use bitmasks
+  BEGIN_DOC
+  ! routine that updates the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a alpha/beta DOUBLE excitation with respect to one another
+  !
+  !  c_1 is the array of the contributions to the rdm for all states
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 3 or 4 will do something
+  END_DOC
+  implicit none
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+  integer                        :: i,j,h1,h2,p1,p2,istate
+  integer                        :: exc(0:2,2,2)
+  double precision               :: phase
+  logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
+  logical                        :: is_integer_in_string_local
+  if (ispin <= 2) return
+
+!  alpha_alpha = .False.
+!  beta_beta   = .False.
+!  alpha_beta  = .False.
+!  spin_trace  = .False.
+!  if(     ispin == 1)then
+!    alpha_alpha = .True.
+!  else if(ispin == 2)then
+!    beta_beta   = .True.
+!  else if(ispin == 3)then
+!    alpha_beta  = .True.
+!  else if(ispin == 4)then
+!    spin_trace  = .True.
+!  endif
+  call get_double_excitation(det_1,det_2,exc,phase,N_int)
+
+  h1 = exc(1,1,1)
+  if(list_orb_reverse(h1).lt.0)return
+  h1 = list_orb_reverse(h1)
+  h2 = exc(1,1,2)
+  if(list_orb_reverse(h2).lt.0)return
+  h2 = list_orb_reverse(h2)
+  p1 = exc(1,2,1)
+  if(list_orb_reverse(p1).lt.0)return
+  p1 = list_orb_reverse(p1)
+  p2 = exc(1,2,2)
+  if(list_orb_reverse(p2).lt.0)return
+  p2 = list_orb_reverse(p2)
+!  if(alpha_beta)then
+    nkeys += 1
+  phase = phase * 0.5d0
+    do istate = 1, N_st
+    values(istate,nkeys) = c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h1
+    keys(2,nkeys) = h2
+    keys(3,nkeys) = p1
+    keys(4,nkeys) = p2
+    nkeys += 1
+    do istate = 1, N_st
+    values(istate,nkeys) = c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h2
+    keys(2,nkeys) = h1
+    keys(3,nkeys) = p2
+    keys(4,nkeys) = p1
+!  else if(spin_trace)then
+!    nkeys += 1
+!    do istate = 1, N_st
+!      values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+!    enddo
+!    keys(1,nkeys) = h1
+!    keys(2,nkeys) = h2
+!    keys(3,nkeys) = p1
+!    keys(4,nkeys) = p2
+!    nkeys += 1
+!    do istate = 1, N_st
+!      values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+!    enddo
+!    keys(1,nkeys) = h2
+!    keys(2,nkeys) = h1
+!    keys(3,nkeys) = p2
+!    keys(4,nkeys) = p1
+!  endif
+end
+
+subroutine orb_range_off_diag_single_to_all_states_ab_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+  use bitmasks
+  BEGIN_DOC
+  ! routine that updates the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a SINGLE excitation with respect to one another
+  !
+  ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
+  !
+  ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 3 or 4 will do something
+  END_DOC
+  implicit none
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
+  integer                        :: occ(N_int*bit_kind_size,2)
+  integer                        :: n_occ_ab(2)
+  integer                        :: i,j,h1,h2,p1,istate
+  integer                        :: exc(0:2,2,2)
+  double precision               :: phase
+
+  logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
+  logical                        :: is_integer_in_string_local
+  if (ispin <= 2) return
+
+  alpha_beta  = .False.
+  spin_trace  = .False.
+  if(ispin == 3)then
+    alpha_beta  = .True.
+  else if(ispin == 4)then
+    spin_trace  = .True.
+  endif
+
   call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int)
   call get_single_excitation(det_1,det_2,exc,phase,N_int)
   if(alpha_beta)then
-   if (exc(0,1,1) == 1) then
-    ! Mono alpha
-    h1 = exc(1,1,1)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 = exc(1,2,1)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-     do i = 1, n_occ_ab(2)
-      h2 = occ(i,2)
-      if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-      h2 = list_orb_reverse(h2)
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+
+    if (exc(0,1,1) == 1) then
+      ! Mono alpha
+      h1 = exc(1,1,1)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 = exc(1,2,1)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+
+      phase = 0.5d0 * phase
+      do i = 1, n_occ_ab(2)
+        h2 = occ(i,2)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
       enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+    else
+      ! Mono beta
+      h1 = exc(1,1,2)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 = exc(1,2,2)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+      phase = 0.5d0 * phase
+      do i = 1, n_occ_ab(1)
+        h2 = occ(i,1)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
       enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-     enddo 
-   else 
-    ! Mono beta
-    h1 = exc(1,1,2)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 = exc(1,2,2)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-     do i = 1, n_occ_ab(1)
-      h2 = occ(i,1)
-      if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-      h2 = list_orb_reverse(h2)
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-     enddo 
-   endif
+    endif
+
   else if(spin_trace)then
-   if (exc(0,1,1) == 1) then
-    ! Mono alpha
-    h1 = exc(1,1,1)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 = exc(1,2,1)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-     do i = 1, n_occ_ab(2)
-      h2 = occ(i,2)
-      if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-      h2 = list_orb_reverse(h2)
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-     enddo 
-   else 
-    ! Mono beta
-    h1 = exc(1,1,2)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 = exc(1,2,2)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-     do i = 1, n_occ_ab(1)
-      h2 = occ(i,1)
-      if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-      h2 = list_orb_reverse(h2)
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-     enddo 
-   endif
-  endif
-  end
 
-  subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+    if (exc(0,1,1) == 1) then
+      ! Mono alpha
+      h1 = exc(1,1,1)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 = exc(1,2,1)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+      phase = 0.5d0 * phase
+      do i = 1, n_occ_ab(2)
+        h2 = occ(i,2)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+      enddo
+
+    else
+
+      ! Mono beta
+      h1 = exc(1,1,2)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 = exc(1,2,2)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+
+      phase = 0.5d0 * phase
+      do i = 1, n_occ_ab(1)
+        h2 = occ(i,1)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+      enddo
+    endif
+
+  endif
+end
+
+subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
   BEGIN_DOC
- ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
- !
- ! a given couple of determinant det_1, det_2 being a ALPHA SINGLE excitation with respect to one another
- ! 
- ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
- ! 
- ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation 
- !
- ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- !
- ! ispin determines which spin-spin component of the two-rdm you will update 
- !
- ! ispin   == 1 :: alpha/ alpha
- ! ispin   == 2 :: beta / beta
- ! ispin   == 3 :: alpha/ beta
- ! ispin   == 4 :: spin traced <=> total two-rdm 
- !
- ! here, only ispin == 1 or 4 will do something
+  ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a ALPHA SINGLE excitation with respect to one another
+  !
+  ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
+  !
+  ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 1 or 4 will do something
   END_DOC
   use bitmasks
   implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
- integer, intent(in) :: list_orb_reverse(mo_num)
- integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- 
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
   integer                        :: occ(N_int*bit_kind_size,2)
   integer                        :: n_occ_ab(2)
-  integer :: i,j,h1,h2,p1,istate
+  integer                        :: i,j,h1,h2,p1,istate
   integer                        :: exc(0:2,2,2)
   double precision               :: phase
- 
+
   logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
-  logical :: is_integer_in_string
-  alpha_alpha = .False.   
-  beta_beta   = .False.
-  alpha_beta  = .False.
+  logical                        :: is_integer_in_string_local
+  alpha_alpha = .False.
+!  beta_beta   = .False.
+!  alpha_beta  = .False.
   spin_trace  = .False.
   if(     ispin == 1)then
-   alpha_alpha = .True.
-  else if(ispin == 2)then
-   beta_beta   = .True.
-  else if(ispin == 3)then
-   alpha_beta  = .True.
+    alpha_alpha = .True.
   else if(ispin == 4)then
-   spin_trace  = .True.
-  endif
- 
-  call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int)
-  call get_single_excitation(det_1,det_2,exc,phase,N_int)
-  if(alpha_alpha.or.spin_trace)then
-   if (exc(0,1,1) == 1) then
-    ! Mono alpha
-    h1 = exc(1,1,1)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 = exc(1,2,1)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-     do i = 1, n_occ_ab(1)
-      h2 = occ(i,1)
-      if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-      h2 = list_orb_reverse(h2)
-
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h1
-      keys(2,nkeys) = h2
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-  
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = h2
-      keys(4,nkeys) = p1
-
-      nkeys += 1
-      do istate = 1, N_st
-       values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-      enddo
-      keys(1,nkeys) = h2
-      keys(2,nkeys) = h1
-      keys(3,nkeys) = p1
-      keys(4,nkeys) = h2
-     enddo 
-   else 
+    spin_trace  = .True.
+  else
     return
-   endif
   endif
-  end
 
-  subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+!  if(alpha_alpha.or.spin_trace)then
+    call get_single_excitation(det_1,det_2,exc,phase,N_int)
+    if (exc(0,1,1) == 1) then
+      ! Mono alpha
+      h1 = exc(1,1,1)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 = exc(1,2,1)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int)
+
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+
+      phase = 0.5d0*phase
+      do i = 1, n_occ_ab(1)
+        h2 = occ(i,1)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+      enddo
+    else
+      return
+    endif
+!  endif
+end
+
+subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
   use bitmasks
   BEGIN_DOC
- ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
- !
- ! a given couple of determinant det_1, det_2 being a BETA  SINGLE excitation with respect to one another
- ! 
- ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
- ! 
- ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation 
- !
- ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- !
- ! ispin determines which spin-spin component of the two-rdm you will update 
- !
- ! ispin   == 1 :: alpha/ alpha
- ! ispin   == 2 :: beta / beta
- ! ispin   == 3 :: alpha/ beta
- ! ispin   == 4 :: spin traced <=> total two-rdm 
- !
- ! here, only ispin == 2 or 4 will do something
+  ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a BETA  SINGLE excitation with respect to one another
+  !
+  ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
+  !
+  ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 2 or 4 will do something
   END_DOC
   implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
- integer, intent(in) :: list_orb_reverse(mo_num)
- integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- 
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int,2),det_2(N_int,2)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  integer(bit_kind), intent(in)  :: orb_bitmask(N_int)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
   integer                        :: occ(N_int*bit_kind_size,2)
   integer                        :: n_occ_ab(2)
-  integer :: i,j,h1,h2,p1,istate
+  integer                        :: i,j,h1,h2,p1,istate
   integer                        :: exc(0:2,2,2)
   double precision               :: phase
   logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
-  logical :: is_integer_in_string
-  alpha_alpha = .False.   
+  logical                        :: is_integer_in_string_local
+!  alpha_alpha = .False.
   beta_beta   = .False.
-  alpha_beta  = .False.
+!  alpha_beta  = .False.
   spin_trace  = .False.
-  if(     ispin == 1)then
-   alpha_alpha = .True.
-  else if(ispin == 2)then
-   beta_beta   = .True.
-  else if(ispin == 3)then
-   alpha_beta  = .True.
+  if(ispin == 2)then
+    beta_beta   = .True.
   else if(ispin == 4)then
-   spin_trace  = .True.
-  endif
- 
- 
-  call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int)
-  call get_single_excitation(det_1,det_2,exc,phase,N_int)
-  if(beta_beta.or.spin_trace)then
-   if (exc(0,1,1) == 1) then
+    spin_trace  = .True.
+  else
     return
-   else
-    ! Mono beta
-    h1 =  exc(1,1,2)
-    if(.not.is_integer_in_string(h1,orb_bitmask,N_int))return
-    h1 = list_orb_reverse(h1)
-    p1 =  exc(1,2,2)
-    if(.not.is_integer_in_string(p1,orb_bitmask,N_int))return
-    p1 = list_orb_reverse(p1)
-    do i = 1, n_occ_ab(2)
-     h2 = occ(i,2)
-     if(.not.is_integer_in_string(h2,orb_bitmask,N_int))cycle
-     h2 = list_orb_reverse(h2)
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-     enddo
-     keys(1,nkeys) = h1
-     keys(2,nkeys) = h2
-     keys(3,nkeys) = p1
-     keys(4,nkeys) = h2
-
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-     enddo
-     keys(1,nkeys) = h1
-     keys(2,nkeys) = h2
-     keys(3,nkeys) = h2
-     keys(4,nkeys) = p1
-  
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-     enddo
-     keys(1,nkeys) = h2
-     keys(2,nkeys) = h1
-     keys(3,nkeys) = h2
-     keys(4,nkeys) = p1
-
-     nkeys += 1
-     do istate = 1, N_st
-      values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-     enddo
-     keys(1,nkeys) = h2
-     keys(2,nkeys) = h1
-     keys(3,nkeys) = p1
-     keys(4,nkeys) = h2
-    enddo 
-   endif
   endif
-  end
 
 
-  subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+!  if(beta_beta.or.spin_trace)then
+    call get_single_excitation(det_1,det_2,exc,phase,N_int)
+    if (exc(0,1,1) == 1) then
+      return
+    else
+      ! Mono beta
+      h1 =  exc(1,1,2)
+      if(.not.is_integer_in_string_local(h1,orb_bitmask,N_int))return
+      p1 =  exc(1,2,2)
+      if(.not.is_integer_in_string_local(p1,orb_bitmask,N_int))return
+
+      call bitstring_to_list_ab(det_1, occ, n_occ_ab, N_int)
+      h1 = list_orb_reverse(h1)
+      p1 = list_orb_reverse(p1)
+
+      phase = 0.5d0*phase
+      do i = 1, n_occ_ab(2)
+        h2 = occ(i,2)
+        if(.not.is_integer_in_string_local(h2,orb_bitmask,N_int))cycle
+        h2 = list_orb_reverse(h2)
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h1
+        keys(2,nkeys) = h2
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = h2
+        keys(4,nkeys) = p1
+
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+      enddo
+    endif
+!  endif
+end
+
+
+subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
   use bitmasks
   BEGIN_DOC
- ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
- !
- ! a given couple of determinant det_1, det_2 being a ALPHA/ALPHA DOUBLE excitation with respect to one another
- ! 
- ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
- ! 
- ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation 
- !
- ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- !
- ! ispin determines which spin-spin component of the two-rdm you will update 
- !
- ! ispin   == 1 :: alpha/ alpha
- ! ispin   == 2 :: beta / beta
- ! ispin   == 3 :: alpha/ beta
- ! ispin   == 4 :: spin traced <=> total two-rdm 
- !
- ! here, only ispin == 1 or 4 will do something
+  ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a ALPHA/ALPHA DOUBLE excitation with respect to one another
+  !
+  ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
+  !
+  ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 1 or 4 will do something
   END_DOC
   implicit none
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int),det_2(N_int)
- integer, intent(in) :: list_orb_reverse(mo_num)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- 
- 
-  integer :: i,j,h1,h2,p1,p2,istate
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int),det_2(N_int)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
+
+  integer                        :: i,j,h1,h2,p1,p2,istate
   integer                        :: exc(0:2,2)
   double precision               :: phase
- 
+
   logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
-  logical :: is_integer_in_string
-  alpha_alpha = .False.   
-  beta_beta   = .False.
-  alpha_beta  = .False.
+  logical                        :: is_integer_in_string_local
+  alpha_alpha = .False.
+!  beta_beta   = .False.
+!  alpha_beta  = .False.
   spin_trace  = .False.
   if(     ispin == 1)then
-   alpha_alpha = .True.
-  else if(ispin == 2)then
-   beta_beta   = .True.
-  else if(ispin == 3)then
-   alpha_beta  = .True.
+    alpha_alpha = .True.
   else if(ispin == 4)then
-   spin_trace  = .True.
+    spin_trace  = .True.
+  else
+    return
   endif
+  call get_double_excitation_spin(det_1,det_2,exc,phase,N_int)
+  h1 =exc(1,1)
+  if(list_orb_reverse(h1).lt.0)return
+  h2 =exc(2,1)
+  if(list_orb_reverse(h2).lt.0)return
+  p1 =exc(1,2)
+  if(list_orb_reverse(p1).lt.0)return
+  p2 =exc(2,2)
+  if(list_orb_reverse(p2).lt.0)return
+
+  h1 = list_orb_reverse(h1)
+  h2 = list_orb_reverse(h2)
+  p1 = list_orb_reverse(p1)
+  p2 = list_orb_reverse(p2)
+
+  phase = 0.5d0*phase
+!  if(alpha_alpha.or.spin_trace)then
+    nkeys += 1
+
+    do istate = 1, N_st
+      values(istate,nkeys) = c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h1
+    keys(2,nkeys) = h2
+    keys(3,nkeys) = p1
+    keys(4,nkeys) = p2
+
+    nkeys += 1
+    do istate = 1, N_st
+      values(istate,nkeys) = - c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h1
+    keys(2,nkeys) = h2
+    keys(3,nkeys) = p2
+    keys(4,nkeys) = p1
+
+    nkeys += 1
+    do istate = 1, N_st
+      values(istate,nkeys) = c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h2
+    keys(2,nkeys) = h1
+    keys(3,nkeys) = p2
+    keys(4,nkeys) = p1
+
+    nkeys += 1
+    do istate = 1, N_st
+      values(istate,nkeys) = - c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h2
+    keys(2,nkeys) = h1
+    keys(3,nkeys) = p1
+    keys(4,nkeys) = p2
+!  endif
+end
+
+subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
+  use bitmasks
+  BEGIN_DOC
+  ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for
+  !
+  ! a given couple of determinant det_1, det_2 being a BETA /BETA  DOUBLE excitation with respect to one another
+  !
+  ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
+  !
+  ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation
+  !
+  ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
+  !
+  ! ispin determines which spin-spin component of the two-rdm you will update
+  !
+  ! ispin   == 1                 :: alpha/ alpha
+  ! ispin   == 2                 :: beta / beta
+  ! ispin   == 3                 :: alpha/ beta
+  ! ispin   == 4                 :: spin traced <=> total two-rdm
+  !
+  ! here, only ispin == 2 or 4 will do something
+  END_DOC
+  implicit none
+
+  integer, intent(in)            :: ispin,sze_buff,N_st
+  integer(bit_kind), intent(in)  :: det_1(N_int),det_2(N_int)
+  integer, intent(in)            :: list_orb_reverse(mo_num)
+  double precision, intent(in)   :: c_1(N_st)
+  double precision, intent(out)  :: values(N_st,sze_buff)
+  integer         , intent(out)  :: keys(4,sze_buff)
+  integer         , intent(inout) :: nkeys
+
+  integer                        :: i,j,h1,h2,p1,p2,istate
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+  logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
+  logical                        :: is_integer_in_string_local
+!  alpha_alpha = .False.
+  beta_beta   = .False.
+!  alpha_beta  = .False.
+  spin_trace  = .False.
+  if(ispin == 2)then
+    beta_beta   = .True.
+  else if(ispin == 4)then
+    spin_trace  = .True.
+  else
+    return
+  endif
+
   call get_double_excitation_spin(det_1,det_2,exc,phase,N_int)
   h1 =exc(1,1)
   if(list_orb_reverse(h1).lt.0)return
@@ -770,11 +905,12 @@
   p2 =exc(2,2)
   if(list_orb_reverse(p2).lt.0)return
   p2 = list_orb_reverse(p2)
-  if(alpha_alpha.or.spin_trace)then
+
+!  if(beta_beta.or.spin_trace)then
+    phase = 0.5d0*phase
     nkeys += 1
-    
     do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+      values(istate,nkeys) = c_1(istate) * phase
     enddo
     keys(1,nkeys) = h1
     keys(2,nkeys) = h2
@@ -783,16 +919,16 @@
 
     nkeys += 1
     do istate = 1, N_st
-     values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
+      values(istate,nkeys) = - c_1(istate) * phase
     enddo
     keys(1,nkeys) = h1
     keys(2,nkeys) = h2
     keys(3,nkeys) = p2
     keys(4,nkeys) = p1
-                                          
+
     nkeys += 1
     do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
+      values(istate,nkeys) = c_1(istate) * phase
     enddo
     keys(1,nkeys) = h2
     keys(2,nkeys) = h1
@@ -801,115 +937,12 @@
 
     nkeys += 1
     do istate = 1, N_st
-     values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
+      values(istate,nkeys) = - c_1(istate) * phase
     enddo
     keys(1,nkeys) = h2
     keys(2,nkeys) = h1
     keys(3,nkeys) = p1
     keys(4,nkeys) = p2
-  endif
-  end
-
-  subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-  use bitmasks
-  BEGIN_DOC
- ! routine that update the OFF DIAGONAL PART of the two body rdms in a specific range of orbitals for 
- !
- ! a given couple of determinant det_1, det_2 being a BETA /BETA  DOUBLE excitation with respect to one another
- ! 
- ! c_1 is supposed to be a scalar quantity, such as state averaged coef of the determinant det_1
- ! 
- ! big_array(dim1,dim1,dim1,dim1) is the two-body rdm to be updated in physicist notation 
- !
- ! orb_bitmask(N_int) is the bitmask for the orbital range, list_orb_reverse(mo_num) is the inverse range of orbitals
- !
- ! ispin determines which spin-spin component of the two-rdm you will update 
- !
- ! ispin   == 1 :: alpha/ alpha
- ! ispin   == 2 :: beta / beta
- ! ispin   == 3 :: alpha/ beta
- ! ispin   == 4 :: spin traced <=> total two-rdm 
- !
- ! here, only ispin == 2 or 4 will do something
-  END_DOC
-  implicit none
- 
- integer, intent(in) :: ispin,sze_buff,N_st
- integer(bit_kind), intent(in)  :: det_1(N_int),det_2(N_int)
- integer, intent(in) :: list_orb_reverse(mo_num)
- double precision, intent(in)   :: c_1(N_st)
- double precision, intent(out)  :: values(N_st,sze_buff)
- integer         , intent(out)  :: keys(4,sze_buff)
- integer         , intent(inout):: nkeys
- 
-  integer :: i,j,h1,h2,p1,p2,istate
-  integer                        :: exc(0:2,2)
-  double precision               :: phase
-  logical                        :: alpha_alpha,beta_beta,alpha_beta,spin_trace
-  logical :: is_integer_in_string
-  alpha_alpha = .False.   
-  beta_beta   = .False.
-  alpha_beta  = .False.
-  spin_trace  = .False.
-  if(     ispin == 1)then
-   alpha_alpha = .True.
-  else if(ispin == 2)then
-   beta_beta   = .True.
-  else if(ispin == 3)then
-   alpha_beta  = .True.
-  else if(ispin == 4)then
-   spin_trace  = .True.
-  endif
- 
-  call get_double_excitation_spin(det_1,det_2,exc,phase,N_int)
-  h1 =exc(1,1)
-  if(list_orb_reverse(h1).lt.0)return
-  h1 = list_orb_reverse(h1)
-  h2 =exc(2,1)
-  if(list_orb_reverse(h2).lt.0)return
-  h2 = list_orb_reverse(h2)
-  p1 =exc(1,2)
-  if(list_orb_reverse(p1).lt.0)return
-  p1 = list_orb_reverse(p1)
-  p2 =exc(2,2)
-  if(list_orb_reverse(p2).lt.0)return
-  p2 = list_orb_reverse(p2)
-  if(beta_beta.or.spin_trace)then
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = p1
-    keys(4,nkeys) = p2
-
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h1
-    keys(2,nkeys) = h2
-    keys(3,nkeys) = p2
-    keys(4,nkeys) = p1
-                                          
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = 0.5d0 * c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h2
-    keys(2,nkeys) = h1
-    keys(3,nkeys) = p2
-    keys(4,nkeys) = p1
-
-    nkeys += 1
-    do istate = 1, N_st
-     values(istate,nkeys) = - 0.5d0 * c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h2
-    keys(2,nkeys) = h1
-    keys(3,nkeys) = p1
-    keys(4,nkeys) = p2
-  endif
-  end
+!  endif
+end
 

From 2655f996228808592ab2e146f98eb52aa65292c5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 26 Feb 2025 15:14:51 +0100
Subject: [PATCH 41/59] Accelerated 2rdm

---
 src/two_rdm_routines/davidson_like_2rdm.irp.f | 156 ++++++++++++------
 1 file changed, 103 insertions(+), 53 deletions(-)

diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f
index 0e6899f8..342b4318 100644
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@@ -13,7 +13,7 @@ subroutine orb_range_2_rdm_openmp(big_array,dim1,norb,list_orb,ispin,u_0,N_st,sz
    END_DOC
    integer, intent(in)             :: N_st,sze
    integer, intent(in)             :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)
    double precision, intent(in)    :: u_0(sze,N_st)
 
    integer                        :: k
@@ -50,7 +50,7 @@ subroutine orb_range_2_rdm_openmp_work(big_array,dim1,norb,list_orb,ispin,u_t,N_
    END_DOC
    integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
    integer, intent(in)             :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)
    double precision, intent(in)   :: u_t(N_st,N_det)
 
    integer :: k
@@ -91,7 +91,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
    integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
    double precision, intent(in)   :: u_t(N_st,N_det)
    integer, intent(in)            :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)
 
    integer(omp_lock_kind)          :: lock_2rdm
    integer                        :: i,j,k,l
@@ -155,6 +155,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
    ! Prepare the array of all alpha single excitations
    ! -------------------------------------------------
 
+   double precision, allocatable :: big_array_local(:,:,:,:,:)
+
    PROVIDE N_int nthreads_davidson elec_alpha_num
     !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson)      &
             !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,lock_2rdm,&
@@ -174,7 +176,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
             !$OMP          buffer, doubles, n_doubles,               &
             !$OMP          tmp_det2, idx, l, kcol_prev,              &
             !$OMP          singles_a, n_singles_a, singles_b,        &
-            !$OMP          n_singles_b, nkeys, keys, values)
+            !$OMP          n_singles_b, nkeys, keys, values, big_array_local)
 
    ! Alpha/Beta double excitations
    ! =============================
@@ -185,6 +187,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
        singles_b(maxab),                                             &
        doubles(maxab),                                               &
        idx(maxab))
+   allocate( big_array_local(N_states,dim1, dim1, dim1, dim1) )
+   big_array_local(:,:,:,:,:) = 0.d0
 
    kcol_prev=-1
 
@@ -256,30 +260,32 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
          do l= 1, N_states
            c_1(l) = u_t(l,l_a) * u_t(l,k_a)
          enddo
-         if(alpha_beta)then
-          ! only ONE contribution
-          if (nkeys+1 .ge. sze_buff) then
-            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-            nkeys = 0
-          endif
-         else if (spin_trace)then
-          ! TWO contributions
+!         if(alpha_beta)then
+!          ! only ONE contribution
+!          if (nkeys+1 .ge. sze_buff) then
+!            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!            nkeys = 0
+!          endif
+!         else if (spin_trace)then
+!          ! TWO contributions
           if (nkeys+2 .ge. sze_buff) then
-            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+            call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
             nkeys = 0
           endif
-         endif
+!         endif
          call orb_range_off_diag_double_to_all_states_ab_dm_buffer(tmp_det,tmp_det2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
 
        enddo
       endif
 
      enddo
-!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-!     nkeys = 0
 
    enddo
-     !$OMP END DO
+     !$OMP END DO NOWAIT
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+     call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+     nkeys = 0
 
      !$OMP DO SCHEDULE(dynamic)
    do k_a=istart+ishift,iend,istep
@@ -334,33 +340,36 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
      ! ----------------------------------
 
      tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
-     do i=1,n_singles_a
-       l_a = singles_a(i)
-       ASSERT (l_a <= N_det)
+     if(alpha_beta.or.spin_trace.or.alpha_alpha)then
+       do i=1,n_singles_a
+         l_a = singles_a(i)
+         ASSERT (l_a <= N_det)
 
-       lrow = psi_bilinear_matrix_rows(l_a)
-       ASSERT (lrow <= N_det_alpha_unique)
+         lrow = psi_bilinear_matrix_rows(l_a)
+         ASSERT (lrow <= N_det_alpha_unique)
+
+         tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+         do l= 1, N_states
+           c_1(l) = u_t(l,l_a) * u_t(l,k_a)
+         enddo
 
-       tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
-       do l= 1, N_states
-         c_1(l) = u_t(l,l_a) * u_t(l,k_a)
-       enddo
-       if(alpha_beta.or.spin_trace.or.alpha_alpha)then
          ! increment the alpha/beta part for single excitations
          if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then
-           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
            nkeys = 0
          endif
          call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
          ! increment the alpha/alpha part for single excitations
          if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then
-           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
            nkeys = 0
          endif
          call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-       endif
 
-     enddo
+       enddo
+     endif
 
 !     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
 !     nkeys = 0
@@ -380,7 +389,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
           c_1(l) = u_t(l,l_a) * u_t(l,k_a)
         enddo
         if (nkeys+4 .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
         call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
@@ -435,33 +445,37 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
      ! ----------------------------------
 
      tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
-     do i=1,n_singles_b
-       l_b = singles_b(i)
-       ASSERT (l_b <= N_det)
+     if(alpha_beta.or.spin_trace.or.beta_beta)then
+      do i=1,n_singles_b
+        l_b = singles_b(i)
+        ASSERT (l_b <= N_det)
 
-       lcol = psi_bilinear_matrix_transp_columns(l_b)
-       ASSERT (lcol <= N_det_beta_unique)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        do l= 1, N_states
+          c_1(l) = u_t(l,l_a) * u_t(l,k_a)
+        enddo
 
-       tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
-       l_a = psi_bilinear_matrix_transp_order(l_b)
-       do l= 1, N_states
-         c_1(l) = u_t(l,l_a) * u_t(l,k_a)
-       enddo
-       if(alpha_beta.or.spin_trace.or.beta_beta)then
         ! increment the alpha/beta  part for single excitations
         if (nkeys+2 * elec_alpha_num .ge. sze_buff ) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
          call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
         ! increment the beta /beta  part for single excitations
         if (nkeys+4 * elec_alpha_num .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
         call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-       endif
-     enddo
+       enddo
+     endif
+
 !     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
 !     nkeys = 0
 
@@ -481,7 +495,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
           c_1(l) = u_t(l,l_a) * u_t(l,k_a)
         enddo
         if (nkeys+4 .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
         call orb_range_off_diag_double_to_all_states_bb_dm_buffer(tmp_det(1,2),psi_det_beta_unique(1, lcol),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
@@ -517,16 +532,28 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
        c_1(l) = u_t(l,k_a) * u_t(l,k_a)
      enddo
 
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+     if (nkeys+elec_alpha_num*elec_alpha_num .ge. sze_buff) then
+!       call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+       call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+       nkeys = 0
+     endif
+
      call orb_range_diag_to_all_states_2_rdm_dm_buffer(tmp_det,c_1,N_states,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+     call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
      nkeys = 0
 
    end do
-    !$OMP END DO
+    !$OMP END DO NOWAIT
    deallocate(buffer, singles_a, singles_b, doubles, idx, keys, values)
-    !$OMP END PARALLEL
+   !$OMP CRITICAL
+   do i=1,N_states
+     big_array(:,:,:,:,i) = big_array(:,:,:,:,i) + big_array_local(i,:,:,:,:)
+   enddo
+   !$OMP END CRITICAL
+   deallocate(big_array_local)
+   !$OMP END PARALLEL
 
 end
 
@@ -593,3 +620,26 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc
 
 end
 
+subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+ use omp_lib
+ implicit none
+ integer, intent(in) :: n_st,nkeys,dim1
+ integer, intent(in) :: keys(4,nkeys)
+ double precision, intent(in) :: values(n_st,nkeys)
+ double precision, intent(inout) :: big_array_local(n_st,dim1,dim1,dim1,dim1)
+
+ integer :: istate
+ integer :: i,h1,h2,p1,p2
+
+ do i = 1, nkeys
+   do istate = 1, N_st
+    h1 = keys(1,i)
+    h2 = keys(2,i)
+    p1 = keys(3,i)
+    p2 = keys(4,i)
+    big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i)
+   enddo
+ enddo
+
+end
+

From 1d5f2f37351775a51ac61404985ef07a9901b188 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 26 Feb 2025 15:26:22 +0100
Subject: [PATCH 42/59] Optim in 2RDM

---
 src/two_rdm_routines/davidson_like_2rdm.irp.f |  8 +--
 src/two_rdm_routines/update_rdm.irp.f         | 72 +++++++++----------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f
index 342b4318..d92f1924 100644
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@@ -632,11 +632,11 @@ subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_arr
  integer :: i,h1,h2,p1,p2
 
  do i = 1, nkeys
+   h1 = keys(1,i)
+   h2 = keys(2,i)
+   p1 = keys(3,i)
+   p2 = keys(4,i)
    do istate = 1, N_st
-    h1 = keys(1,i)
-    h2 = keys(2,i)
-    p1 = keys(3,i)
-    p2 = keys(4,i)
     big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i)
    enddo
  enddo
diff --git a/src/two_rdm_routines/update_rdm.irp.f b/src/two_rdm_routines/update_rdm.irp.f
index 978f6e33..f01706f0 100644
--- a/src/two_rdm_routines/update_rdm.irp.f
+++ b/src/two_rdm_routines/update_rdm.irp.f
@@ -592,6 +592,15 @@ subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,
         keys(3,nkeys) = p1
         keys(4,nkeys) = h2
 
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+
         nkeys += 1
         do istate = 1, N_st
           values(istate,nkeys) = - c_1(istate) * phase
@@ -609,15 +618,6 @@ subroutine orb_range_off_diag_single_to_all_states_aa_dm_buffer(det_1,det_2,c_1,
         keys(2,nkeys) = h1
         keys(3,nkeys) = h2
         keys(4,nkeys) = p1
-
-        nkeys += 1
-        do istate = 1, N_st
-          values(istate,nkeys) = - c_1(istate) * phase
-        enddo
-        keys(1,nkeys) = h2
-        keys(2,nkeys) = h1
-        keys(3,nkeys) = p1
-        keys(4,nkeys) = h2
       enddo
     else
       return
@@ -706,6 +706,15 @@ subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,
         keys(3,nkeys) = p1
         keys(4,nkeys) = h2
 
+        nkeys += 1
+        do istate = 1, N_st
+          values(istate,nkeys) = - c_1(istate) * phase
+        enddo
+        keys(1,nkeys) = h2
+        keys(2,nkeys) = h1
+        keys(3,nkeys) = p1
+        keys(4,nkeys) = h2
+
         nkeys += 1
         do istate = 1, N_st
           values(istate,nkeys) = - c_1(istate) * phase
@@ -723,15 +732,6 @@ subroutine orb_range_off_diag_single_to_all_states_bb_dm_buffer(det_1,det_2,c_1,
         keys(2,nkeys) = h1
         keys(3,nkeys) = h2
         keys(4,nkeys) = p1
-
-        nkeys += 1
-        do istate = 1, N_st
-          values(istate,nkeys) = - c_1(istate) * phase
-        enddo
-        keys(1,nkeys) = h2
-        keys(2,nkeys) = h1
-        keys(3,nkeys) = p1
-        keys(4,nkeys) = h2
       enddo
     endif
 !  endif
@@ -814,6 +814,15 @@ subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,
     keys(3,nkeys) = p1
     keys(4,nkeys) = p2
 
+    nkeys += 1
+    do istate = 1, N_st
+      values(istate,nkeys) = - c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h2
+    keys(2,nkeys) = h1
+    keys(3,nkeys) = p1
+    keys(4,nkeys) = p2
+
     nkeys += 1
     do istate = 1, N_st
       values(istate,nkeys) = - c_1(istate) * phase
@@ -831,15 +840,6 @@ subroutine orb_range_off_diag_double_to_all_states_aa_dm_buffer(det_1,det_2,c_1,
     keys(2,nkeys) = h1
     keys(3,nkeys) = p2
     keys(4,nkeys) = p1
-
-    nkeys += 1
-    do istate = 1, N_st
-      values(istate,nkeys) = - c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h2
-    keys(2,nkeys) = h1
-    keys(3,nkeys) = p1
-    keys(4,nkeys) = p2
 !  endif
 end
 
@@ -917,6 +917,15 @@ subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,
     keys(3,nkeys) = p1
     keys(4,nkeys) = p2
 
+    nkeys += 1
+    do istate = 1, N_st
+      values(istate,nkeys) = - c_1(istate) * phase
+    enddo
+    keys(1,nkeys) = h2
+    keys(2,nkeys) = h1
+    keys(3,nkeys) = p1
+    keys(4,nkeys) = p2
+
     nkeys += 1
     do istate = 1, N_st
       values(istate,nkeys) = - c_1(istate) * phase
@@ -934,15 +943,6 @@ subroutine orb_range_off_diag_double_to_all_states_bb_dm_buffer(det_1,det_2,c_1,
     keys(2,nkeys) = h1
     keys(3,nkeys) = p2
     keys(4,nkeys) = p1
-
-    nkeys += 1
-    do istate = 1, N_st
-      values(istate,nkeys) = - c_1(istate) * phase
-    enddo
-    keys(1,nkeys) = h2
-    keys(2,nkeys) = h1
-    keys(3,nkeys) = p1
-    keys(4,nkeys) = p2
 !  endif
 end
 

From db48267259b6d8c627bb6987925714945c590895 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 27 Feb 2025 13:07:59 +0100
Subject: [PATCH 43/59] 1/sqrt(2) in basis correction proj

---
 external/ezfio                       | 2 +-
 src/mu_of_r/mu_of_r_conditions.irp.f | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index d02132ea..dba01c4f 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
+Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f
index 3ad52a05..ba3675d4 100644
--- a/src/mu_of_r/mu_of_r_conditions.irp.f
+++ b/src/mu_of_r/mu_of_r_conditions.irp.f
@@ -269,7 +269,7 @@ BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ]
    ! epsilon
    mu_of_r_projector_mo(ipoint) = 1.d0/(2.d0*dacos(-1.d0) * mu_of_r_projector_mo(ipoint)**(2.d0/3.d0))
    ! mu
-   mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( mu_of_r_projector_mo(ipoint) )
+   mu_of_r_projector_mo(ipoint) = 1.d0/dsqrt( 2.d0*mu_of_r_projector_mo(ipoint) )
  enddo
 END_PROVIDER
 

From de26e8494ec59afdbae0e8f3c70337d94f498e1d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 28 Feb 2025 11:39:43 +0100
Subject: [PATCH 44/59] Added do_mo_cholesky keyword

---
 src/mo_two_e_ints/EZFIO.cfg      | 6 ++++++
 src/mo_two_e_ints/cholesky.irp.f | 9 ---------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg
index f26bfb61..8844b8a6 100644
--- a/src/mo_two_e_ints/EZFIO.cfg
+++ b/src/mo_two_e_ints/EZFIO.cfg
@@ -1,3 +1,9 @@
+[do_mo_cholesky]
+type: logical
+doc: Use Cholesky decomposition of MO integrals in CI calculations
+interface: ezfio,provider,ocaml
+default: False
+
 [io_mo_cholesky]
 type: Disk_access
 doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ]
diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f
index 062e52e2..f4e390af 100644
--- a/src/mo_two_e_ints/cholesky.irp.f
+++ b/src/mo_two_e_ints/cholesky.irp.f
@@ -1,12 +1,3 @@
-BEGIN_PROVIDER [ logical, do_mo_cholesky ]
- implicit none
- BEGIN_DOC
- ! If True, use Cholesky vectors for MO integrals
- END_DOC
- do_mo_cholesky = do_ao_cholesky
-! do_mo_cholesky = .False.
-END_PROVIDER
-
  BEGIN_PROVIDER [ integer, cholesky_mo_num ]
 &BEGIN_PROVIDER [ integer, cholesky_mo_num_split, (1:5)]
  implicit none

From 61ddf14ab9665ac8af4f2bd0aac07b4d071a5c69 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 28 Feb 2025 18:04:16 +0100
Subject: [PATCH 45/59] Added average mu(r) with rho^2

---
 .../basis_correction/print_routine.irp.f      |  3 +-
 src/mu_of_r/mu_of_r_conditions.irp.f          | 30 +++++++++++++++++--
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f
index 8879fd5d..cc9744f2 100644
--- a/plugins/local/basis_correction/print_routine.irp.f
+++ b/plugins/local/basis_correction/print_routine.irp.f
@@ -79,7 +79,8 @@ subroutine print_basis_correction
   print*,''
   print*,'**************'
   do istate = 1, N_states
-    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  Average mu(r)      , state ',istate,' = ',mu_average_prov(istate)
+    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  Average mu(r) [rho  ], state ',istate,' = ',mu_average_prov(istate)
+    write(*, '(A29,X,I3,X,A3,X,F16.10)') '  Average mu(r) [rho^2], state ',istate,' = ',mu_average_prov2(istate)
   enddo
 
 end
diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f
index ba3675d4..d13605f6 100644
--- a/src/mu_of_r/mu_of_r_conditions.irp.f
+++ b/src/mu_of_r/mu_of_r_conditions.irp.f
@@ -211,7 +211,7 @@
  END_PROVIDER
 
 
- BEGIN_PROVIDER [double precision, mu_average_prov, (N_states)]
+BEGIN_PROVIDER [double precision, mu_average_prov, (N_states)]
  implicit none
  BEGIN_DOC
  ! average value of mu(r) weighted with the total one-e density and divided by the number of electrons
@@ -233,7 +233,33 @@
   enddo
   mu_average_prov(istate) = mu_average_prov(istate) / elec_num_grid_becke(istate)
  enddo
- END_PROVIDER
+END_PROVIDER
+
+BEGIN_PROVIDER [double precision, mu_average_prov2, (N_states)]
+ implicit none
+ BEGIN_DOC
+ ! average value of mu(r) weighted with square of the total one-e density
+ !
+ ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals
+ !
+ ! in the one- and two-body density matrix are excluded
+ END_DOC
+ integer :: ipoint,istate
+ double precision :: weight,density,norm
+ mu_average_prov2 = 0.d0
+ do istate = 1, N_states
+  norm = 0.d0
+  do ipoint = 1, n_points_final_grid
+   weight =final_weight_at_r_vector(ipoint)
+   density = one_e_dm_and_grad_alpha_in_r(4,ipoint,istate) &
+           + one_e_dm_and_grad_beta_in_r(4,ipoint,istate)
+   if(mu_of_r_prov(ipoint,istate).gt.1.d+09)cycle
+   mu_average_prov2(istate) += mu_of_r_prov(ipoint,istate) * weight * density*density
+   norm = norm + density*density*weight
+  enddo
+  mu_average_prov2(istate) = mu_average_prov2(istate) / norm
+ enddo
+END_PROVIDER
 
 
 BEGIN_PROVIDER [double precision, mu_of_r_projector_mo, (n_points_final_grid) ]

From 7d940958a3978b6dee271a3487e14d4a6a8d076f Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Mar 2025 18:36:40 +0100
Subject: [PATCH 46/59] Update irpf90

---
 external/ezfio  | 2 +-
 external/irpf90 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
diff --git a/external/irpf90 b/external/irpf90
index 4ab1b175..bd35d581 160000
--- a/external/irpf90
+++ b/external/irpf90
@@ -1 +1 @@
-Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6
+Subproject commit bd35d581d917c452054c074b88667fe4b5e7ca27

From fa75be002a1fe17c9fd68041e2429084f609c94d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 6 Mar 2025 18:45:46 +0100
Subject: [PATCH 47/59] Fix irpf90

---
 external/irpf90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/irpf90 b/external/irpf90
index bd35d581..43160c60 160000
--- a/external/irpf90
+++ b/external/irpf90
@@ -1 +1 @@
-Subproject commit bd35d581d917c452054c074b88667fe4b5e7ca27
+Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b

From 1775df957c53f5139076e79f963e2191ca8c4d64 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 7 Mar 2025 15:31:48 +0100
Subject: [PATCH 48/59] Added geometry optimization script

---
 scripts/qp_cipsi_rsh         |   1 +
 scripts/qp_cipsi_rsh_mu_of_r |   1 +
 scripts/qp_geom_opt.py       | 136 +++++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+)
 create mode 120000 scripts/qp_cipsi_rsh
 create mode 120000 scripts/qp_cipsi_rsh_mu_of_r
 create mode 100755 scripts/qp_geom_opt.py

diff --git a/scripts/qp_cipsi_rsh b/scripts/qp_cipsi_rsh
new file mode 120000
index 00000000..c3d4376b
--- /dev/null
+++ b/scripts/qp_cipsi_rsh
@@ -0,0 +1 @@
+/home/scemama/qp2/plugins/qp_plugins_lct/stable/rsdft_cipsi/qp_cipsi_rsh
\ No newline at end of file
diff --git a/scripts/qp_cipsi_rsh_mu_of_r b/scripts/qp_cipsi_rsh_mu_of_r
new file mode 120000
index 00000000..feef7119
--- /dev/null
+++ b/scripts/qp_cipsi_rsh_mu_of_r
@@ -0,0 +1 @@
+/home/scemama/qp2/plugins/qp_plugins_lct/stable/rsdft_cipsi/qp_cipsi_rsh_mu_of_r
\ No newline at end of file
diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py
new file mode 100755
index 00000000..240a411f
--- /dev/null
+++ b/scripts/qp_geom_opt.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Usage:
+       qp_geom_opt [-s state] [-r executable] [-f] [-t tolerance] <EZFIO_FILE>
+
+Options:
+      -s --state=<state>       Excited state to optimize 
+      -f --scf                 Perform an SCF after each geomety change
+      -r --qp_run=executable   Excited state to optimize 
+      -t --tol=tolerance       Convergence criterion on the energy
+"""                  
+
+
+try:
+    from docopt import docopt
+    from module_handler import ModuleHandler, get_dict_child
+    from module_handler import get_l_module_descendant
+    from qp_path import QP_SRC, QP_PLUGINS, QP_DATA, QP_ROOT
+except ImportError:
+    print("Please check if you have sourced the ${QP_ROOT}/quantum_package.rc")
+    print("(`source ${QP_ROOT}/quantum_package.rc`)")
+    print(sys.exit(1))
+
+    
+import numpy as np
+import subprocess
+from scipy.optimize import minimize
+from ezfio import ezfio
+
+import sys
+
+
+def set_unbuffered_output():
+    """Ensure sys.stdout is unbuffered or line-buffered in a portable way."""
+    if hasattr(sys.stdout, "reconfigure"):  # Python 3.7+
+        sys.stdout.reconfigure(line_buffering=True)
+    else:
+        sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1)
+
+set_unbuffered_output()
+
+
+
+
+def get_energy(file, state, arguments):
+    """Compute the energy of the given state by calling Quantum Package."""
+    if not arguments["--qp_run"]:
+        raise ValueError("--qp_run option missing")
+
+    if arguments["--scf"]:
+        executable = "scf"
+    else:
+        executable = "save_ortho_mos"
+
+    result = subprocess.run(f"qp_run {executable} {file} > {file}.energy.out",
+                            shell=True, capture_output=True, text=True, check=True
+    )
+
+    executable = arguments["--qp_run"]
+    result = subprocess.run( f"qp_run {executable} {file} > {file}.energy.out",
+                             shell=True)
+    
+    energy = None
+    with open(f"{file}.energy.out", 'r') as f:
+        for line in f:
+            if "Energy of state" in line and f"{state}" in line:
+                energy = float(line.split()[-1])  # Extracts the energy value
+    
+    return energy
+    raise ValueError("Energy not found in Quantum Package output. Update script {sys.argv[0]}")
+
+def set_coordinates(coord):
+    """Update the nuclear coordinates in EZFIO."""
+    ezfio.set_nuclei_nucl_coord(coord)
+
+
+def get_coordinates():
+    """Retrieve the current nuclear coordinates from EZFIO."""
+    return np.array(ezfio.get_nuclei_nucl_coord())
+
+
+def energy_function(coord, file, state, arguments):
+    """Wrapper for the energy calculation, ensuring coordinates are updated."""
+    set_coordinates(coord)
+    energy = get_energy(file, state, arguments)
+
+    label = ezfio.get_nuclei_nucl_label()
+    num_atoms = len(label)
+    coord = coord.reshape(3, num_atoms).T  # Reshape into (num_atoms, 3)
+    coord_angstrom = coord * 0.529177  # Convert atomic units to angstroms
+    
+    print(num_atoms)
+    print(f"Energy: {energy:15.10f}")
+    for i, (x, y, z) in enumerate(coord_angstrom):
+        print(f"{label[i]:3s} {x:15.8f} {y:15.8f} {z:15.8f}")  # Replace 'X' with actual atomic symbols
+    return energy
+
+
+def optimize_geometry(file, state, arguments):
+    """Perform geometry optimization using SciPy's minimize function."""
+
+    x0 = get_coordinates().flatten()
+
+    if arguments["--tol"]:
+        tolerance = float(tol=arguments["--tol"])
+    else:
+        tolerance = 1.e-3
+
+    result = minimize(energy_function, x0, args=(file, state, arguments), method='BFGS', jac=None, tol=tolerance, options={'eps': 1.e-3})
+    
+    if result.success:
+        print("Optimization successful!")
+        print("Final energy:", result.fun)
+        print("Optimized coordinates:", result.x)
+    else:
+        print("Optimization failed:", result.message)
+    
+    set_coordinates(result.x)  # Store the optimized geometry
+    return result
+
+
+def main(arguments):
+    if arguments["--state"]:
+        state=arguments["--state"]
+    else:
+        state=1
+    ezfio_filename = arguments["<EZFIO_FILE>"]
+    ezfio.set_file(ezfio_filename)
+
+    optimize_geometry(ezfio_filename, state, arguments)
+
+
+if __name__ == "__main__":
+    ARG = docopt(__doc__)
+    main(ARG)

From ad0c807a53c909b7c59b6fcae6d92d3fdcb8bf23 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 7 Mar 2025 15:41:18 +0100
Subject: [PATCH 49/59] Change for Powell in geom_opt

---
 scripts/qp_geom_opt.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py
index 240a411f..73c8ce8e 100755
--- a/scripts/qp_geom_opt.py
+++ b/scripts/qp_geom_opt.py
@@ -107,7 +107,16 @@ def optimize_geometry(file, state, arguments):
     else:
         tolerance = 1.e-3
 
-    result = minimize(energy_function, x0, args=(file, state, arguments), method='BFGS', jac=None, tol=tolerance, options={'eps': 1.e-3})
+    result = minimize(energy_function, x0, args=(file, state, arguments),
+                      method='Powell',
+                      tol=tolerance,
+                      options={'xtol': tolerance, 'ftol': tolerance})
+    
+#    result = minimize(energy_function, x0, args=(file, state, arguments),
+#                      method='BFGS',
+#                      jac=None,
+#                      tol=tolerance,
+#                      options={'eps': 1.e-3})
     
     if result.success:
         print("Optimization successful!")

From 69d5811d14dbaebce578aedd5ebf3d81b179ae22 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 7 Mar 2025 15:54:55 +0100
Subject: [PATCH 50/59] Change for Powell in geom_opt

---
 scripts/qp_geom_opt.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/qp_geom_opt.py b/scripts/qp_geom_opt.py
index 73c8ce8e..c089a5d8 100755
--- a/scripts/qp_geom_opt.py
+++ b/scripts/qp_geom_opt.py
@@ -80,10 +80,16 @@ def get_coordinates():
     return np.array(ezfio.get_nuclei_nucl_coord())
 
 
+memo_energy = {}
 def energy_function(coord, file, state, arguments):
     """Wrapper for the energy calculation, ensuring coordinates are updated."""
+    h = np.array_str(coord)
+    if h in memo_energy:
+        return memo_energy[h]
+
     set_coordinates(coord)
     energy = get_energy(file, state, arguments)
+    memo_energy[h] = energy
 
     label = ezfio.get_nuclei_nucl_label()
     num_atoms = len(label)

From 3552856ca79f40687f331a885dbb603857fcc38b Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 11 Mar 2025 18:37:00 +0100
Subject: [PATCH 51/59] working on extra_basis_int

---
 external/ezfio                                |  2 +-
 .../extra_basis_int/extra_basis_int.irp.f     | 22 ++++++++++----
 .../local/extra_basis_int/pot_ao_ints.irp.f   | 29 +++++++++++++++++++
 src/ao_extra_basis/qp_copy_extra_basis        |  2 +-
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/external/ezfio b/external/ezfio
index dba01c4f..d02132ea 160000
--- a/external/ezfio
+++ b/external/ezfio
@@ -1 +1 @@
-Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3
+Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091
diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f
index 1d35b1c2..13c31f36 100644
--- a/plugins/local/extra_basis_int/extra_basis_int.irp.f
+++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f
@@ -11,18 +11,28 @@ program extra_basis_int
 ! call routine_pot_ne
 ! call routine_test_pot_ne_extra_mixed
 ! call routine_test_coul_1s
- call print_v_ne_extra_basis
- call print_v_ne_basis
+! call print_v_ne_extra_basis
+! call print_v_ne_basis
+ call test_v_ne_a_extra_basis
  
 end
 
+subroutine test_v_ne_a_extra_basis
+ implicit none
+ integer :: i,j
+ do i = 1, ao_extra_num
+  write(*,'(100(F16.10,X))')pot_vne_A_extra_basis(1:ao_extra_num,i)
+ enddo
+end
+
+
 subroutine test_overlap
  implicit none
   integer :: i,j
-  do i = 1, ao_extra_num
-   do j = 1, ao_extra_num 
-    write(33,*)ao_extra_overlap(j,i)
-   enddo
+  do i = 1, ao_num
+!   do j = 1, ao_num 
+    write(33,'(100(F16.10,X))')ao_extra_overlap_mixed(i,1:ao_extra_num)
+!   enddo
   enddo
 end
 
diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
index 5f3af244..1828f980 100644
--- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f
+++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
@@ -1,3 +1,32 @@
+BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra_num)]
+ implicit none
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\sum_{R in nuclei} -Z <chi_i|1/|r-R||chi_j>$ 
+  !
+  !
+  ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the extra basis 
+  END_DOC
+  integer :: mu,nu,k_nucl
+  double precision :: mu_in, R_nucl(3),charge_nucl, integral
+  double precision :: NAI_pol_mult_erf_ao_extra
+  mu_in = 10.d0**10
+  pot_vne_A_extra_basis = 0.d0
+  do mu = 1, ao_extra_num
+   do nu = 1, ao_extra_num
+    do k_nucl = 1, nucl_num
+     R_nucl(1:3) = nucl_coord_transp(1:3,k_nucl)
+     charge_nucl = nucl_charge(k_nucl)
+     integral = NAI_pol_mult_erf_ao_extra(mu, nu, mu_in, R_nucl)
+     pot_vne_A_extra_basis(nu,mu) += -integral * charge_nucl
+    enddo
+   enddo
+  enddo
+
+END_PROVIDER 
+
+
 
 double precision function NAI_pol_mult_erf_ao_extra(i_ao, j_ao, mu_in, C_center)
 
diff --git a/src/ao_extra_basis/qp_copy_extra_basis b/src/ao_extra_basis/qp_copy_extra_basis
index cb435e18..6d0e17b8 100755
--- a/src/ao_extra_basis/qp_copy_extra_basis
+++ b/src/ao_extra_basis/qp_copy_extra_basis
@@ -58,7 +58,7 @@ do
 done
 i=primitives_normalized
 newfile=primitives_normalized_extra
-cp ${EZFIO_extra}/ao_basis/$i ${EZFIO_target}/ao_extra_basis/$newfile
+cp ${EZFIO_extra}/basis/$i ${EZFIO_target}/ao_extra_basis/$newfile
 
 echo "COPYING ALL DATA FROM "$EZFIO_extra"/aux_quantities/ to "${EZFIO_target}"/ao_extra_basis/"
 i=data_one_e_dm_tot_ao.gz

From a9a91f5a9fae8e34de7eed39b70d6a957607072f Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 11 Mar 2025 23:51:57 +0100
Subject: [PATCH 52/59] modified fit_1s_basis.irp.f

---
 external/irpf90                               |  2 +-
 plugins/local/extra_basis_int/coul_1s.irp.f   | 10 +++++
 .../local/extra_basis_int/pot_ao_ints.irp.f   | 44 ++++++++++++++-----
 .../local/extra_basis_int/v_mixed_extra.irp.f | 27 ++++++++++++
 src/ao_extra_basis/fit_1s_basis.irp.f         |  1 +
 5 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/external/irpf90 b/external/irpf90
index 43160c60..4ab1b175 160000
--- a/external/irpf90
+++ b/external/irpf90
@@ -1 +1 @@
-Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b
+Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6
diff --git a/plugins/local/extra_basis_int/coul_1s.irp.f b/plugins/local/extra_basis_int/coul_1s.irp.f
index 964ed36a..62ec4331 100644
--- a/plugins/local/extra_basis_int/coul_1s.irp.f
+++ b/plugins/local/extra_basis_int/coul_1s.irp.f
@@ -11,6 +11,11 @@ double precision function coul_full_ao_pq_r_1s(p,q,R,R_p,R_q)
  double precision, intent(in) :: R(3),R_p(3),R_q(3)
  integer, intent(in) :: p,q
  double precision :: coef,dist,P_pq(3),coefaos
+ if(.not.ao_extra_only_1s)then
+   print*,'You are using a function assuming that the extra basis is fitted on 1s functions'
+   print*,'But this is not the case apparently ... stopping'
+   stop
+ endif
  coefaos= ao_extra_coef_normalized(p,1) * ao_extra_coef_normalized(q,1) 
  coef = inv_pi_gamma_pq_3_2_ao_extra(p,q) * E_pq_ao_extra(p,q)
  P_pq = ao_extra_expo(p,1) * R_p + ao_extra_expo(q,1) * R_q
@@ -40,6 +45,11 @@ double precision function coul_pq_r_1s(p,q,R,R_p,R_q)
  double precision, intent(in) :: R(3),R_p(3),R_q(3)
  integer, intent(in) :: p,q
  double precision :: dist,P_pq(3)
+ if(.not.ao_extra_only_1s)then
+   print*,'You are using a function assuming that the extra basis is fitted on 1s functions'
+   print*,'But this is not the case apparently ... stopping'
+   stop
+ endif
  P_pq = ao_extra_expo(p,1) * R_p + ao_extra_expo(q,1) * R_q
  P_pq = P_pq * inv_gamma_pq_ao_extra(q,p)
  dist = (P_pq(1)-R(1)) * (P_pq(1)-R(1))
diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
index 1828f980..5a7d7580 100644
--- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f
+++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
@@ -3,23 +3,45 @@ BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra
   BEGIN_DOC
   !
   ! Computes the following integral :
-  ! $\sum_{R in nuclei} -Z <chi_i|1/|r-R||chi_j>$ 
+  ! $\sum_{R in the USUAL nuclei} -Z <chi_i|1/|r-R||chi_j>$ 
   !
   !
-  ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the extra basis 
+  ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis 
   END_DOC
-  integer :: mu,nu,k_nucl
-  double precision :: mu_in, R_nucl(3),charge_nucl, integral
-  double precision :: NAI_pol_mult_erf_ao_extra
-  mu_in = 10.d0**10
+  integer :: mu,nu
+  double precision :: v_nucl_extra_ao
   pot_vne_A_extra_basis = 0.d0
   do mu = 1, ao_extra_num
    do nu = 1, ao_extra_num
-    do k_nucl = 1, nucl_num
-     R_nucl(1:3) = nucl_coord_transp(1:3,k_nucl)
-     charge_nucl = nucl_charge(k_nucl)
-     integral = NAI_pol_mult_erf_ao_extra(mu, nu, mu_in, R_nucl)
-     pot_vne_A_extra_basis(nu,mu) += -integral * charge_nucl
+    pot_vne_A_extra_basis(nu,mu)= v_nucl_extra_ao(mu,nu)
+   enddo
+  enddo
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, pot_vne_extra_basis, (ao_num,ao_num)]
+ implicit none
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\sum_{R in EXTRA nuclei} -Z <chi_i|1/|r-R||chi_j>$ 
+  !
+  !
+  ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the USUAL basis 
+  END_DOC
+  integer :: mu,nu,k_nucl
+  double precision :: mu_in, R_nucl(3),charge_nucl, integral
+  double precision :: NAI_pol_mult_erf_ao
+  mu_in = 10.d0**10
+  pot_vne_extra_basis = 0.d0
+  do mu = 1, ao_num
+   do nu = 1, ao_num
+    do k_nucl = 1, extra_nucl_num
+     R_nucl(1:3) = extra_nucl_coord_transp(1:3,k_nucl)
+     charge_nucl = extra_nucl_charge(k_nucl)
+     integral = NAI_pol_mult_erf_ao(mu, nu, mu_in, R_nucl)
+     pot_vne_extra_basis(nu,mu) += -integral * charge_nucl
     enddo
    enddo
   enddo
diff --git a/plugins/local/extra_basis_int/v_mixed_extra.irp.f b/plugins/local/extra_basis_int/v_mixed_extra.irp.f
index ac856692..15958ab5 100644
--- a/plugins/local/extra_basis_int/v_mixed_extra.irp.f
+++ b/plugins/local/extra_basis_int/v_mixed_extra.irp.f
@@ -1,3 +1,6 @@
+!!! TODO:: optimize when "ao_extra_only_1s" is True
+
+
 double precision function v_extra_nucl_extra_ao(i_ao,j_ao)
  implicit none
   BEGIN_DOC
@@ -23,6 +26,30 @@ double precision function v_extra_nucl_extra_ao(i_ao,j_ao)
  enddo
 end
 
+double precision function v_extra_nucl_ao(i_ao,j_ao)
+ implicit none
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$.
+  !
+  !
+  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis 
+  !
+  ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei
+  END_DOC
+ integer, intent(in) ::i_ao,j_ao
+ integer :: i
+ double precision :: mu_in, coord(3),charge, integral
+ double precision :: NAI_pol_mult_erf_ao
+ mu_in = 1.d+10
+ do i = 1, extra_nucl_num
+  coord(1:3) = extra_nucl_coord_transp(1:3,i)
+  charge = extra_nucl_charge(i)
+  v_extra_nucl_ao += -NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, coord) * charge
+ enddo
+end
+
 
 double precision function v_nucl_extra_ao(i_ao,j_ao)
  implicit none
diff --git a/src/ao_extra_basis/fit_1s_basis.irp.f b/src/ao_extra_basis/fit_1s_basis.irp.f
index ef09d5b2..9c788173 100644
--- a/src/ao_extra_basis/fit_1s_basis.irp.f
+++ b/src/ao_extra_basis/fit_1s_basis.irp.f
@@ -31,6 +31,7 @@ program fit_1s_basis
  call ezfio_set_extra_nuclei_extra_nucl_label(new_nucl_label_1s)
 !
  call ezfio_set_ao_extra_basis_ao_extra_num(n_func_tot)
+ call ezfio_set_ao_extra_basis_ao_extra_only_1s(.True.)
  call ezfio_set_ao_extra_basis_ao_extra_center(ao_extra_center)
  call ezfio_set_ao_extra_basis_ao_extra_nucl(new_ao_nucl_1s)
  call ezfio_set_ao_extra_basis_ao_extra_prim_num(new_ao_prim_num_1s)

From a6091ef989993ea447c63d5dda4a56bf33ecec7c Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 12 Mar 2025 11:18:35 +0100
Subject: [PATCH 53/59] two_e_int.irp.f is OK

---
 .../extra_basis_int/extra_basis_int.irp.f     | 19 ++++-
 .../extra_basis_int/ref_extra_basis.irp.f     | 19 ++++-
 plugins/local/extra_basis_int/two_e_int.irp.f | 71 +++++++++++++++++++
 3 files changed, 107 insertions(+), 2 deletions(-)
 create mode 100644 plugins/local/extra_basis_int/two_e_int.irp.f

diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f
index 13c31f36..1f2919ba 100644
--- a/plugins/local/extra_basis_int/extra_basis_int.irp.f
+++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f
@@ -13,7 +13,8 @@ program extra_basis_int
 ! call routine_test_coul_1s
 ! call print_v_ne_extra_basis
 ! call print_v_ne_basis
- call test_v_ne_a_extra_basis
+! call test_v_ne_a_extra_basis
+ call print_v_ee_mixed_direct
  
 end
 
@@ -199,3 +200,19 @@ subroutine print_v_ne_basis
  print*,'accu = ',accu
 
 end
+
+subroutine print_v_ee_mixed_direct
+ implicit none
+ integer :: i,j,k,l
+ double precision :: ao_two_e_integral_mixed_direct
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do k = 1, ao_extra_num
+    do l = 1, ao_extra_num
+     write(34,*)ao_two_e_integral_mixed_direct(i, j, k, l)
+    enddo
+   enddo
+  enddo
+ enddo
+
+end
diff --git a/plugins/local/extra_basis_int/ref_extra_basis.irp.f b/plugins/local/extra_basis_int/ref_extra_basis.irp.f
index 39055fd0..8265bb9c 100644
--- a/plugins/local/extra_basis_int/ref_extra_basis.irp.f
+++ b/plugins/local/extra_basis_int/ref_extra_basis.irp.f
@@ -6,7 +6,8 @@ program pouet
 ! call routine_pot_ne_extra
 ! call ref_pot_ne_mixed
 ! call ref_pot_ne
- call ref_pot_ne_extra_mixed
+! call ref_pot_ne_extra_mixed
+ call ref_v_ee_mixed_direct
 
 end
 
@@ -113,3 +114,19 @@ subroutine ref_pot_ne_extra_mixed
   enddo
  enddo
 end
+
+subroutine ref_v_ee_mixed_direct
+ implicit none
+ integer :: i,j,k,l
+ double precision :: ao_two_e_integral
+ do i = 1, 15
+  do j = 1, 15
+   do k = 16, ao_num
+    do l = 16, ao_num
+     write(33,*)ao_two_e_integral(i, j, k, l)
+    enddo
+   enddo
+  enddo
+ enddo
+
+end
diff --git a/plugins/local/extra_basis_int/two_e_int.irp.f b/plugins/local/extra_basis_int/two_e_int.irp.f
new file mode 100644
index 00000000..a055c437
--- /dev/null
+++ b/plugins/local/extra_basis_int/two_e_int.irp.f
@@ -0,0 +1,71 @@
+double precision function ao_two_e_integral_mixed_direct(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  !
+  ! where i,j belong to the REGULAR AO basis and k,l to the EXTRA basis
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in)            :: i, j, k, l
+
+  integer                        :: p, q, r, s
+  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
+  integer                        :: iorder_p(3), iorder_q(3)
+  double precision               :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision               :: integral
+  double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,pp
+  double precision               :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq
+  double precision               :: general_primitive_integral
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_extra_nucl(k)
+  num_l = ao_extra_nucl(l)
+  ao_two_e_integral_mixed_direct = 0.d0
+
+  do p = 1, 3
+    I_power(p) = ao_power(i,p)
+    J_power(p) = ao_power(j,p)
+    K_power(p) = ao_extra_power(k,p)
+    L_power(p) = ao_extra_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = extra_nucl_coord(num_k,p)
+    L_center(p) = extra_nucl_coord(num_l,p)
+  enddo
+
+  double precision               :: coef1, coef2, coef3, coef4
+  double precision               :: p_inv,q_inv
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p,i)
+    do q = 1, ao_prim_num(j)
+      coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
+      call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
+          ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),                 &
+          I_power,J_power,I_center,J_center,dim1)
+      p_inv = 1.d0/pp
+      do r = 1, ao_extra_prim_num(k)
+        coef3 = coef2*ao_extra_coef_normalized_ordered_transp(r,k)
+        do s = 1, ao_extra_prim_num(l)
+          coef4 = coef3*ao_extra_coef_normalized_ordered_transp(s,l)
+          call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
+              ao_extra_expo_ordered_transp(r,k),ao_extra_expo_ordered_transp(s,l),             &
+              K_power,L_power,K_center,L_center,dim1)
+          q_inv = 1.d0/qq
+          integral = general_primitive_integral(dim1,              &
+              P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+              Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+          ao_two_e_integral_mixed_direct = ao_two_e_integral_mixed_direct +  coef4 * integral
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+end

From 52fdf18e74bf7551280299aeee51cfd18d59d2e8 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 12 Mar 2025 11:25:27 +0100
Subject: [PATCH 54/59] added exchange integrals

---
 .../extra_basis_int/extra_basis_int.irp.f     | 19 ++++-
 .../extra_basis_int/ref_extra_basis.irp.f     | 19 ++++-
 plugins/local/extra_basis_int/two_e_int.irp.f | 76 ++++++++++++++++++-
 3 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/plugins/local/extra_basis_int/extra_basis_int.irp.f b/plugins/local/extra_basis_int/extra_basis_int.irp.f
index 1f2919ba..59b90374 100644
--- a/plugins/local/extra_basis_int/extra_basis_int.irp.f
+++ b/plugins/local/extra_basis_int/extra_basis_int.irp.f
@@ -14,7 +14,8 @@ program extra_basis_int
 ! call print_v_ne_extra_basis
 ! call print_v_ne_basis
 ! call test_v_ne_a_extra_basis
- call print_v_ee_mixed_direct
+! call print_v_ee_mixed_direct
+ call print_v_ee_mixed_exchange
  
 end
 
@@ -216,3 +217,19 @@ subroutine print_v_ee_mixed_direct
  enddo
 
 end
+
+subroutine print_v_ee_mixed_exchange
+ implicit none
+ integer :: i,j,k,l
+ double precision :: ao_two_e_integral_mixed_exchange
+ do i = 1, ao_num
+  do j = 1, ao_extra_num
+   do k = 1, ao_num
+    do l = 1, ao_extra_num
+     write(34,*)ao_two_e_integral_mixed_exchange(i, j, k, l)
+    enddo
+   enddo
+  enddo
+ enddo
+
+end
diff --git a/plugins/local/extra_basis_int/ref_extra_basis.irp.f b/plugins/local/extra_basis_int/ref_extra_basis.irp.f
index 8265bb9c..70d77733 100644
--- a/plugins/local/extra_basis_int/ref_extra_basis.irp.f
+++ b/plugins/local/extra_basis_int/ref_extra_basis.irp.f
@@ -7,7 +7,8 @@ program pouet
 ! call ref_pot_ne_mixed
 ! call ref_pot_ne
 ! call ref_pot_ne_extra_mixed
- call ref_v_ee_mixed_direct
+! call ref_v_ee_mixed_direct
+ call ref_v_ee_mixed_exchange
 
 end
 
@@ -130,3 +131,19 @@ subroutine ref_v_ee_mixed_direct
  enddo
 
 end
+
+subroutine ref_v_ee_mixed_exchange
+ implicit none
+ integer :: i,j,k,l
+ double precision :: ao_two_e_integral
+ do i = 1, 15
+  do j = 16, ao_num
+   do k = 1, 15
+    do l = 16, ao_num
+     write(33,*)ao_two_e_integral(i, j, k, l)
+    enddo
+   enddo
+  enddo
+ enddo
+
+end
diff --git a/plugins/local/extra_basis_int/two_e_int.irp.f b/plugins/local/extra_basis_int/two_e_int.irp.f
index a055c437..2cde4153 100644
--- a/plugins/local/extra_basis_int/two_e_int.irp.f
+++ b/plugins/local/extra_basis_int/two_e_int.irp.f
@@ -3,8 +3,9 @@ double precision function ao_two_e_integral_mixed_direct(i, j, k, l)
   BEGIN_DOC
   !  integral of the AO basis <ik|jl> or (ij|kl)
   !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  !     A     A           B     B
   !
-  ! where i,j belong to the REGULAR AO basis and k,l to the EXTRA basis
+  ! where i,j belong to the REGULAR AO basis (system A) and k,l to the EXTRA basis (system B)
   END_DOC
 
   implicit none
@@ -69,3 +70,76 @@ double precision function ao_two_e_integral_mixed_direct(i, j, k, l)
   enddo    ! p
 
 end
+
+double precision function ao_two_e_integral_mixed_exchange(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  !     A     B           A     B
+  !
+  ! where i,k belong to the REGULAR AO basis (system A) and j,l to the EXTRA basis (system B)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in)            :: i, j, k, l
+
+  integer                        :: p, q, r, s
+  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
+  integer                        :: iorder_p(3), iorder_q(3)
+  double precision               :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision               :: integral
+  double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,pp
+  double precision               :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq
+  double precision               :: general_primitive_integral
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_extra_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_extra_nucl(l)
+  ao_two_e_integral_mixed_exchange = 0.d0
+
+  do p = 1, 3
+    I_power(p) = ao_power(i,p)
+    J_power(p) = ao_extra_power(j,p)
+    K_power(p) = ao_power(k,p)
+    L_power(p) = ao_extra_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = extra_nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = extra_nucl_coord(num_l,p)
+  enddo
+
+  double precision               :: coef1, coef2, coef3, coef4
+  double precision               :: p_inv,q_inv
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p,i)
+    do q = 1, ao_extra_prim_num(j)
+      coef2 = coef1*ao_extra_coef_normalized_ordered_transp(q,j)
+      call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
+          ao_expo_ordered_transp(p,i),ao_extra_expo_ordered_transp(q,j),                 &
+          I_power,J_power,I_center,J_center,dim1)
+      p_inv = 1.d0/pp
+      do r = 1, ao_prim_num(k)
+        coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
+        do s = 1, ao_extra_prim_num(l)
+          coef4 = coef3*ao_extra_coef_normalized_ordered_transp(s,l)
+          call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
+              ao_expo_ordered_transp(r,k),ao_extra_expo_ordered_transp(s,l),             &
+              K_power,L_power,K_center,L_center,dim1)
+          q_inv = 1.d0/qq
+          integral = general_primitive_integral(dim1,              &
+              P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+              Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+          ao_two_e_integral_mixed_exchange = ao_two_e_integral_mixed_exchange +  coef4 * integral
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+end

From ffe7a8485f56c15a0b099a04c680a1fbd9eb57e9 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 12 Mar 2025 12:25:09 +0100
Subject: [PATCH 55/59] added direct, exchange between extra_basis and usual
 basis, together with a tuto in ao_extra_basis

---
 .../local/extra_basis_int/ao_overlap.irp.f    |  6 +++--
 .../local/extra_basis_int/pot_ao_ints.irp.f   |  1 -
 .../local/extra_basis_int/v_mixed_extra.irp.f | 12 ++++-----
 src/ao_extra_basis/README.rst                 |  5 +++-
 src/ao_extra_basis/tuto/He_A.xyz              |  3 +++
 src/ao_extra_basis/tuto/example_copy.sh       | 26 +++++++++++++++++++
 src/ao_extra_basis/tuto/h2o.xyz               |  7 +++++
 7 files changed, 50 insertions(+), 10 deletions(-)
 create mode 100644 src/ao_extra_basis/tuto/He_A.xyz
 create mode 100755 src/ao_extra_basis/tuto/example_copy.sh
 create mode 100644 src/ao_extra_basis/tuto/h2o.xyz

diff --git a/plugins/local/extra_basis_int/ao_overlap.irp.f b/plugins/local/extra_basis_int/ao_overlap.irp.f
index 4f8debb6..9e45e690 100644
--- a/plugins/local/extra_basis_int/ao_overlap.irp.f
+++ b/plugins/local/extra_basis_int/ao_overlap.irp.f
@@ -4,7 +4,7 @@
  BEGIN_PROVIDER [double precision, ao_extra_overlap  , (ao_extra_num, ao_extra_num)]
 
   BEGIN_DOC
-  ! Overlap between atomic basis functions:
+  ! Overlap between atomic basis functions belonging to the EXTRA BASIS 
   !
   ! :math:`\int \chi_i(r) \chi_j(r) dr`
   END_DOC
@@ -69,7 +69,9 @@ END_PROVIDER
  BEGIN_PROVIDER [double precision, ao_extra_overlap_mixed  , (ao_num, ao_extra_num)]
 
   BEGIN_DOC
-  ! Overlap between atomic basis functions:
+  ! Overlap between atomic basis functions:  
+  !
+  ! first index belongs to the REGULAR AO basis, second to the EXTRA basis 
   !
   ! <AO_i|AO_j extra basis>
   END_DOC
diff --git a/plugins/local/extra_basis_int/pot_ao_ints.irp.f b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
index 5a7d7580..f52e7579 100644
--- a/plugins/local/extra_basis_int/pot_ao_ints.irp.f
+++ b/plugins/local/extra_basis_int/pot_ao_ints.irp.f
@@ -5,7 +5,6 @@ BEGIN_PROVIDER [ double precision, pot_vne_A_extra_basis, (ao_extra_num,ao_extra
   ! Computes the following integral :
   ! $\sum_{R in the USUAL nuclei} -Z <chi_i|1/|r-R||chi_j>$ 
   !
-  !
   ! where $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis 
   END_DOC
   integer :: mu,nu
diff --git a/plugins/local/extra_basis_int/v_mixed_extra.irp.f b/plugins/local/extra_basis_int/v_mixed_extra.irp.f
index 15958ab5..8b8ce92e 100644
--- a/plugins/local/extra_basis_int/v_mixed_extra.irp.f
+++ b/plugins/local/extra_basis_int/v_mixed_extra.irp.f
@@ -9,9 +9,9 @@ double precision function v_extra_nucl_extra_ao(i_ao,j_ao)
   ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne^{extra}(r)$.
   !
   !
-  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis 
+  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis (system B)
   !
-  ! and v_ne^{extra}(r) is the Coulomb potential coming from the EXTRA nuclei
+  ! and v_ne^{extra}(r) is the Coulomb potential coming from the EXTRA nuclei (system B)
   END_DOC
  integer, intent(in) ::i_ao,j_ao
  double precision :: mu_in,charge,coord(3)
@@ -34,9 +34,9 @@ double precision function v_extra_nucl_ao(i_ao,j_ao)
   ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$.
   !
   !
-  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis 
+  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the REGULAR basis (system A)
   !
-  ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei
+  ! and v_ne(r) is the Coulomb potential coming from the EXTRA nuclei (system B)
   END_DOC
  integer, intent(in) ::i_ao,j_ao
  integer :: i
@@ -59,9 +59,9 @@ double precision function v_nucl_extra_ao(i_ao,j_ao)
   ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) v_ne(r)$.
   !
   !
-  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis 
+  ! where BOTH $\chi_i(r)$ AND $\chi_j(r)$ belongs to the EXTRA basis (system B)
   !
-  ! and v_ne(r) is the Coulomb potential coming from the REGULAR nuclei
+  ! and v_ne(r) is the Coulomb potential coming from the REGULAR nuclei (system A)
   END_DOC
  integer, intent(in) ::i_ao,j_ao
  double precision :: mu_in,charge,coord(3)
diff --git a/src/ao_extra_basis/README.rst b/src/ao_extra_basis/README.rst
index f60d71c0..5f850255 100644
--- a/src/ao_extra_basis/README.rst
+++ b/src/ao_extra_basis/README.rst
@@ -5,7 +5,9 @@ extra_basis
 Plugin to handle an extra basis, which is attached to the extra_nuclei.
 It is essentially a duplication of all important quantities (coefficients, exponents and so on) of the usual |AO| basis. 
 
-An interesting feature is the possibility to fit any basis made at most with "p" functions onto a purely "s" basis. 
+Check in the directory "tuto" for a simple example of how to create a fictious system "B" attached independently to a system "A"
+
+Another interesting feature is the possibility to fit any basis made at most with "p" functions onto a purely "s" basis. 
 This is done with the various scripts here: 
  
  - qp_fit_1s_basis : script that creates an |EZFIO| folder corresponding to an .xyz file and a basis fitted with only "s" functions
@@ -13,3 +15,4 @@ This is done with the various scripts here:
  
 Ex: 
 qp_add_extra_fit_system LiH.ezfio/ h2o.xyz # takes the EZFIO folder "LiH.ezfio" and creates all necessary additional basis and nuclei based on h2o.xyz, but only with 1s functions. 
+
diff --git a/src/ao_extra_basis/tuto/He_A.xyz b/src/ao_extra_basis/tuto/He_A.xyz
new file mode 100644
index 00000000..d5285ade
--- /dev/null
+++ b/src/ao_extra_basis/tuto/He_A.xyz
@@ -0,0 +1,3 @@
+1
+He atom "A"
+He 0. 0. 0.
diff --git a/src/ao_extra_basis/tuto/example_copy.sh b/src/ao_extra_basis/tuto/example_copy.sh
new file mode 100755
index 00000000..0677b183
--- /dev/null
+++ b/src/ao_extra_basis/tuto/example_copy.sh
@@ -0,0 +1,26 @@
+source ~/qp2/quantum_package.rc
+## Example of how to generate an additional h2o molecule, stored as a extra basis/nuclei etc .. to an He
+
+sys_B=h2o.xyz
+basis_B=sto-3g
+output_B=${sys_B%.xyz}_${basis_B}
+
+sys_A=He_A.xyz
+basis_A=cc-pvtz
+output_A=${sys_A%.xyz}_${basis_A}_extra_${output_B}
+
+# we create the system "B" that will be attached as an "extra system" to the syste "A"
+qp create_ezfio -b $basis_B $sys_B -o ${output_B}
+# we perform an HF calculation to obtain the AO density matrix 
+qp run scf 
+# we save the density matrix in the EZFIO
+qp run save_one_e_dm 
+# we create the system "A" 
+qp create_ezfio -b $basis_A $sys_A -o ${output_A}
+# We perform an SCF calculation
+qp run scf 
+# we copy the system "B" information as extra nuclei/basis etc in the EZFIO of system "A"
+qp_copy_extra_basis ${output_B} ${output_A}
+
+# we execute an example of progra that prints a lot of useful integrals/information on the A-B interaction
+qp run test_extra_basis | tee ${output_A}.test_extra_basis
diff --git a/src/ao_extra_basis/tuto/h2o.xyz b/src/ao_extra_basis/tuto/h2o.xyz
new file mode 100644
index 00000000..d3928214
--- /dev/null
+++ b/src/ao_extra_basis/tuto/h2o.xyz
@@ -0,0 +1,7 @@
+3
+
+O      0.000000    -0.399441  3.000000   
+H      0.761232     0.199721  3.000000   
+H     -0.761232     0.199721  3.000000   
+
+

From 2037939abb877d983991d9a15dfff361549a6972 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 12 Mar 2025 12:29:29 +0100
Subject: [PATCH 56/59] corrected a bug in
 qp2/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f

---
 plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f b/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f
index e71d49fc..68d965cb 100644
--- a/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f
+++ b/plugins/local/tuto_plugins/tuto_I/traces_one_e.irp.f
@@ -30,6 +30,7 @@ BEGIN_PROVIDER [ double precision, trace_ao_one_e_ints]
 !          have the same number of functions
  END_DOC
  integer :: i,j
+ double precision :: accu
  double precision, allocatable :: inv_overlap_times_integrals(:,:) ! = h S^{-1}
  allocate(inv_overlap_times_integrals(ao_num,ao_num))
  ! routine that computes the product of two matrices, you can check it with 

From d84edaad5f386a316280e02a9deb2c7821ac6057 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 13 Mar 2025 14:36:24 +0100
Subject: [PATCH 57/59] Fix ao_sphe_overlap

---
 etc/openmp.rc                              |  2 +-
 src/ao_one_e_ints/ao_ortho_canonical.irp.f | 49 +++++++++++-----------
 src/mo_basis/mos.irp.f                     |  9 ++++
 3 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/etc/openmp.rc b/etc/openmp.rc
index 7f71c9b8..a8ced5a0 100644
--- a/etc/openmp.rc
+++ b/etc/openmp.rc
@@ -1 +1 @@
-export OMP_NESTED=True
+#export OMP_NESTED=True
diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
index eff7e7be..747d9183 100644
--- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f
+++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
@@ -1,5 +1,5 @@
  BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)]
-&BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num ]
+&BEGIN_PROVIDER [ integer, ao_sphe_num ]
   implicit none
   BEGIN_DOC
 ! Coefficients to go from cartesian to spherical coordinates in the current
@@ -13,23 +13,23 @@
   ao_cart_to_sphe_coef(:,:) = 0.d0
   ! Assume order provided by ao_power_index
   i = 1
-  ao_cart_to_sphe_num = 0
+  ao_sphe_num = 0
   do while (i <= ao_num)
     select case ( ao_l(i) )
       case (0)
-        ao_cart_to_sphe_num += 1
-        ao_cart_to_sphe_coef(i,ao_cart_to_sphe_num) = 1.d0
+        ao_sphe_num += 1
+        ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0
         i += 1
       BEGIN_TEMPLATE
       case ($SHELL)
         if (ao_power(i,1) == $SHELL) then
           do k=1,size(cart_to_sphe_$SHELL,2)
             do j=1,size(cart_to_sphe_$SHELL,1)
-              ao_cart_to_sphe_coef(i+j-1,ao_cart_to_sphe_num+k) = cart_to_sphe_$SHELL(j,k)
+              ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k)
             enddo
           enddo
           i += size(cart_to_sphe_$SHELL,1)
-          ao_cart_to_sphe_num += size(cart_to_sphe_$SHELL,2)
+          ao_sphe_num += size(cart_to_sphe_$SHELL,2)
         endif
       SUBST [ SHELL ]
         1;;
@@ -49,36 +49,36 @@
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_overlap, (ao_cart_to_sphe_num,ao_cart_to_sphe_num) ]
+BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ]
  implicit none
  BEGIN_DOC
  ! |AO| overlap matrix in the spherical basis set
  END_DOC
- double precision, allocatable :: S(:,:)
- allocate (S(ao_cart_to_sphe_num,ao_num))
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
 
- call dgemm('T','N',ao_cart_to_sphe_num,ao_num,ao_num, 1.d0, &
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
    ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), &
-   ao_overlap,size(ao_overlap,1), 0.d0, &
-   S, size(S,1))
+   S_inv,size(ao_overlap,1), 0.d0, &
+   tmp, size(tmp,1))
 
- call dgemm('N','N',ao_cart_to_sphe_num,ao_cart_to_sphe_num,ao_num, 1.d0, &
-   S, size(S,1), &
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
    ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), 0.d0, &
-   ao_cart_to_sphe_overlap,size(ao_cart_to_sphe_overlap,1))
+   ao_sphe_overlap,size(ao_sphe_overlap,1))
 
- deallocate(S)
+ deallocate(tmp)
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_cart_to_sphe_num,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ]
  implicit none
  BEGIN_DOC
  ! Inverse of :c:data:`ao_cart_to_sphe_coef`
  END_DOC
 
  call get_pseudo_inverse(ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1),&
-   ao_num,ao_cart_to_sphe_num, &
+   ao_num,ao_sphe_num, &
    ao_cart_to_sphe_inv, size(ao_cart_to_sphe_inv,1), lin_dep_cutoff)
 END_PROVIDER
 
@@ -120,17 +120,17 @@ END_PROVIDER
 
     double precision, allocatable :: S(:,:)
 
-    allocate(S(ao_cart_to_sphe_num,ao_cart_to_sphe_num))
+    allocate(S(ao_sphe_num,ao_sphe_num))
     S = 0.d0
-    do i=1,ao_cart_to_sphe_num
+    do i=1,ao_sphe_num
       S(i,i) = 1.d0
     enddo
 
-    ao_ortho_canonical_num = ao_cart_to_sphe_num
-    call ortho_canonical(ao_cart_to_sphe_overlap, size(ao_cart_to_sphe_overlap,1), &
-      ao_cart_to_sphe_num, S, size(S,1), ao_ortho_canonical_num, lin_dep_cutoff)
+    ao_ortho_canonical_num = ao_sphe_num
+    call ortho_canonical(ao_sphe_overlap, size(ao_sphe_overlap,1), &
+      ao_sphe_num, S, size(S,1), ao_ortho_canonical_num, lin_dep_cutoff)
 
-    call dgemm('N','N', ao_num, ao_ortho_canonical_num, ao_cart_to_sphe_num, 1.d0, &
+    call dgemm('N','N', ao_num, ao_ortho_canonical_num, ao_sphe_num, 1.d0, &
       ao_cart_to_sphe_coef, size(ao_cart_to_sphe_coef,1), &
       S, size(S,1), &
       0.d0, ao_ortho_canonical_coef, size(ao_ortho_canonical_coef,1))
@@ -167,3 +167,4 @@ BEGIN_PROVIDER [double precision, ao_ortho_canonical_overlap, (ao_ortho_canonica
     enddo
   enddo
 END_PROVIDER
+
diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f
index 1ed859ee..a8b5441d 100644
--- a/src/mo_basis/mos.irp.f
+++ b/src/mo_basis/mos.irp.f
@@ -346,3 +346,12 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
   deallocate(T)
 end
 
+
+BEGIN_PROVIDER [ double precision, mo_coef_spherical]
+ implicit none
+ BEGIN_DOC
+ ! MO coefficients in the basis of spherical harmonics AOs.
+ END_DOC
+ 
+END_PROVIDER
+

From 8ae207b3432f8ac3aab44b21614e1bb6ee213c51 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 14 Mar 2025 18:11:37 +0100
Subject: [PATCH 58/59] Implemented cartesian -> spherical AO conversion

---
 scripts/qp_import_trexio.py                |   5 +-
 src/ao_basis/aos.irp.f                     |  25 +++++
 src/ao_basis/spherical_to_cartesian.irp.f  | 120 +++++++++++++++++++--
 src/ao_one_e_ints/ao_one_e_ints.irp.f      |  16 +++
 src/ao_one_e_ints/ao_ortho_canonical.irp.f |  54 +++++-----
 src/ao_one_e_ints/ao_overlap.irp.f         |  23 ++++
 src/ao_one_e_ints/kin_ao_ints.irp.f        |  22 ++++
 src/ao_one_e_ints/pot_ao_ints.irp.f        |  22 ++++
 src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f |  64 +++++++++++
 src/mo_basis/mos.irp.f                     |  12 ++-
 src/trexio/EZFIO.cfg                       |   7 ++
 src/trexio/export_trexio_routines.irp.f    | 114 +++++++++++++-------
 src/utils/linear_algebra.irp.f             |   9 --
 13 files changed, 407 insertions(+), 86 deletions(-)

diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py
index a515efba..b8d0d8ed 100755
--- a/scripts/qp_import_trexio.py
+++ b/scripts/qp_import_trexio.py
@@ -271,12 +271,11 @@ def write_ezfio(trexio_filename, filename):
     if basis_type.lower() == "gaussian" and not cartesian:
         try:
           import trexio_tools
-          fd, tmp = tempfile.mkstemp()
-          os.close(fd)
+          tmp = "cartesian_"+trexio_filename
           retcode = subprocess.call(["trexio", "convert-to", "-t", "cartesian", "-o", tmp, trexio_filename])
           trexio_file_cart = trexio.File(tmp,mode='r',back_end=trexio.TREXIO_AUTO)
           cartesian = trexio.read_ao_cartesian(trexio_file_cart)
-          os.unlink(tmp)
+          ezfio.set_trexio_trexio_file(tmp)
         except:
           pass
 
diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f
index 440cc865..d9381015 100644
--- a/src/ao_basis/aos.irp.f
+++ b/src/ao_basis/aos.irp.f
@@ -20,7 +20,22 @@ BEGIN_PROVIDER [ integer, ao_shell, (ao_num) ]
      ao_shell(k) = i
    enddo
  enddo
+END_PROVIDER
 
+BEGIN_PROVIDER [ integer, ao_sphe_shell, (ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Index of the shell to which the AO corresponds
+ END_DOC
+ integer :: i, j, k, n
+ k=0
+ do i=1,shell_num
+   n = shell_ang_mom(i)
+   do j=-n,n
+     k = k+1
+     ao_sphe_shell(k) = i
+   enddo
+ enddo
 END_PROVIDER
 
 BEGIN_PROVIDER [ integer, ao_first_of_shell, (shell_num) ]
@@ -133,6 +148,16 @@ END_PROVIDER
     enddo
   enddo
 
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, ao_sphe_coef_normalization_factor, (ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Normalization factor in spherical AO basis
+ END_DOC
+
+ ao_sphe_coef_normalization_factor(:) = 1.d0
+
 END_PROVIDER
 
  BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num,ao_prim_num_max) ]
diff --git a/src/ao_basis/spherical_to_cartesian.irp.f b/src/ao_basis/spherical_to_cartesian.irp.f
index 336161f8..53a60413 100644
--- a/src/ao_basis/spherical_to_cartesian.irp.f
+++ b/src/ao_basis/spherical_to_cartesian.irp.f
@@ -4,7 +4,8 @@
 ! First index is the index of the cartesian AO, obtained by ao_power_index
 ! Second index is the index of the spherical AO
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_0, (1) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=0
@@ -12,10 +13,12 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ]
   cart_to_sphe_0 = 0.d0
 
   cart_to_sphe_0 ( 1, 1) = 1.0d0
+  cart_to_sphe_norm_0 (1) = 1.d0
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_1, (3) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=1
@@ -25,10 +28,14 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_1, (3,3) ]
   cart_to_sphe_1 ( 3, 1) = 1.0d0
   cart_to_sphe_1 ( 1, 2) = 1.0d0
   cart_to_sphe_1 ( 2, 3) = 1.0d0
+  cart_to_sphe_norm_1 (1) = 1.d0
+  cart_to_sphe_norm_1 (2) = 1.d0
+  cart_to_sphe_norm_1 (3) = 1.d0
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_2, (6) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=2
@@ -43,10 +50,14 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_2, (6,5) ]
   cart_to_sphe_2 ( 1, 4) = 0.86602540378443864676d0
   cart_to_sphe_2 ( 4, 4) = -0.86602540378443864676d0
   cart_to_sphe_2 ( 2, 5) = 1.0d0
+
+  cart_to_sphe_norm_2 = (/ 1.0d0, 1.7320508075688772d0, 1.7320508075688772d0, 1.0d0, &
+                           1.7320508075688772d0, 1.0d0 /)
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_3, (10) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=3
@@ -69,10 +80,15 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_3, (10,7) ]
   cart_to_sphe_3 ( 4, 6) = -1.0606601717798212866d0
   cart_to_sphe_3 ( 2, 7) = 1.0606601717798212866d0
   cart_to_sphe_3 ( 7, 7) = -0.790569415042094833d0
+
+  cart_to_sphe_norm_3 = (/ 1.0d0, 2.23606797749979d0, 2.23606797749979d0, &
+  2.23606797749979d0, 3.872983346207417d0, 2.23606797749979d0, 1.0d0, 2.23606797749979d0, &
+  2.23606797749979d0, 1.d00 /)
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_4, (15) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=4
@@ -107,10 +123,18 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_4, (15,9) ]
   cart_to_sphe_4 (11, 8) = 0.73950997288745200532d0
   cart_to_sphe_4 ( 2, 9) = 1.1180339887498948482d0
   cart_to_sphe_4 ( 7, 9) = -1.1180339887498948482d0
+
+  cart_to_sphe_norm_4 = (/ 1.0d0, 2.6457513110645907d0, 2.6457513110645907d0, &
+  3.4156502553198664d0, 5.916079783099616d0, 3.415650255319866d0, &
+  2.6457513110645907d0, 5.916079783099616d0, 5.916079783099616d0, &
+  2.6457513110645907d0, 1.0d0, 2.6457513110645907d0, 3.415650255319866d0, &
+  2.6457513110645907d0, 1.d00 /)
+
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_5, (21) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=5
@@ -163,10 +187,18 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_5, (21,11) ]
   cart_to_sphe_5 ( 2,11) = 1.169267933366856683d0
   cart_to_sphe_5 ( 7,11) = -1.5309310892394863114d0
   cart_to_sphe_5 (16,11) = 0.7015607600201140098d0
+
+  cart_to_sphe_norm_5 = (/ 1.0d0, 3.0d0, 3.0d0, 4.58257569495584d0, &
+  7.937253933193773d0, 4.58257569495584d0, 4.58257569495584d0, &
+  10.246950765959598d0, 10.246950765959598d0, 4.582575694955841d0, 3.0d0, &
+  7.937253933193773d0, 10.246950765959598d0, 7.937253933193773d0, 3.0d0, 1.0d0, &
+  3.0d0, 4.58257569495584d0, 4.582575694955841d0, 3.0d0, 1.d00 /)
+
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_6, (28) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=6
@@ -243,10 +275,22 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_6, (28,13) ]
   cart_to_sphe_6 ( 2,13) = 1.2151388809514737933d0
   cart_to_sphe_6 ( 7,13) = -1.9764235376052370825d0
   cart_to_sphe_6 (16,13) = 1.2151388809514737933d0
+
+  cart_to_sphe_norm_6 = (/ 1.0d0, 3.3166247903554003d0, 3.3166247903554003d0, &
+  5.744562646538029d0, 9.949874371066201d0, 5.744562646538029d0, &
+  6.797058187186571d0, 15.198684153570666d0, 15.198684153570664d0, &
+  6.797058187186572d0, 5.744562646538029d0, 15.198684153570666d0, &
+  19.621416870348583d0, 15.198684153570666d0, 5.744562646538029d0, &
+  3.3166247903554003d0, 9.949874371066201d0, 15.198684153570664d0, &
+  15.198684153570666d0, 9.9498743710662d0, 3.3166247903554003d0, 1.0d0, &
+  3.3166247903554003d0, 5.744562646538029d0, 6.797058187186572d0, &
+  5.744562646538029d0, 3.3166247903554003d0, 1.d00 /)
+
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_7, (36) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=7
@@ -355,10 +399,25 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_7, (36,15) ]
   cart_to_sphe_7 ( 7,15) = -2.4456993503903949804d0
   cart_to_sphe_7 (16,15) = 1.96875d0
   cart_to_sphe_7 (29,15) = -0.64725984928774934788d0
+
+  cart_to_sphe_norm_7 = (/ 1.0d0, 3.6055512754639896d0, 3.605551275463989d0, &
+  6.904105059069327d0, 11.958260743101398d0, 6.904105059069326d0, &
+  9.26282894152753d0, 20.712315177207984d0, 20.71231517720798d0, &
+  9.26282894152753d0, 9.26282894152753d0, 24.507141816213494d0, &
+  31.63858403911275d0, 24.507141816213494d0, 9.262828941527529d0, &
+  6.904105059069327d0, 20.712315177207984d0, 31.63858403911275d0, &
+  31.63858403911275d0, 20.71231517720798d0, 6.904105059069327d0, &
+  3.6055512754639896d0, 11.958260743101398d0, 20.71231517720798d0, &
+  24.507141816213494d0, 20.71231517720798d0, 11.958260743101398d0, &
+  3.6055512754639896d0, 1.0d0, 3.605551275463989d0, 6.904105059069326d0, &
+  9.26282894152753d0, 9.262828941527529d0, 6.904105059069327d0, &
+  3.6055512754639896d0, 1.d00 /)
+
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_8, (45) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=8
@@ -506,10 +565,28 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_8, (45,17) ]
   cart_to_sphe_8 ( 7,17) = -2.9348392204684739765d0
   cart_to_sphe_8 (16,17) = 2.9348392204684739765d0
   cart_to_sphe_8 (29,17) = -1.2945196985754986958d0
+
+  cart_to_sphe_norm_8 = (/ 1.0d0, 3.872983346207417d0, 3.872983346207417d0, &
+  8.062257748298551d0, 13.964240043768942d0, 8.06225774829855d0, &
+  11.958260743101398d0, 26.739483914241877d0, 26.739483914241877d0, &
+  11.958260743101398d0, 13.55939315961975d0, 35.874782229304195d0, &
+  46.31414470763765d0, 35.874782229304195d0, 13.55939315961975d0, &
+  11.958260743101398d0, 35.874782229304195d0, 54.79963503528103d0, &
+  54.79963503528103d0, 35.874782229304195d0, 11.958260743101398d0, &
+  8.062257748298551d0, 26.739483914241877d0, 46.31414470763765d0, &
+  54.79963503528103d0, 46.314144707637645d0, 26.739483914241877d0, &
+  8.06225774829855d0, 3.872983346207417d0, 13.964240043768942d0, &
+  26.739483914241877d0, 35.874782229304195d0, 35.874782229304195d0, &
+  26.739483914241877d0, 13.96424004376894d0, 3.8729833462074166d0, 1.0d0, &
+  3.872983346207417d0, 8.06225774829855d0, 11.958260743101398d0, &
+  13.55939315961975d0, 11.958260743101398d0, 8.06225774829855d0, &
+  3.8729833462074166d0, 1.d0 /)
+
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ]
+ BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ]
+&BEGIN_PROVIDER [ double precision, cart_to_sphe_norm_9, (55) ]
   implicit none
   BEGIN_DOC
 ! Spherical -> Cartesian Transformation matrix for l=9
@@ -703,5 +780,28 @@ BEGIN_PROVIDER [ double precision, cart_to_sphe_9, (55,19) ]
   cart_to_sphe_9 (16,19) = 4.1179360680974030877d0
   cart_to_sphe_9 (29,19) = -2.3781845426185916576d0
   cart_to_sphe_9 (46,19) = 0.60904939217552380708d0
+
+  cart_to_sphe_norm_9 = (/ 1.0d0, 4.1231056256176615d0, 4.1231056256176615d0, &
+  9.219544457292889d0, 15.968719422671313d0, 9.219544457292889d0, &
+  14.86606874731851d0, 33.24154027718933d0, 33.24154027718933d0, &
+  14.866068747318508d0, 18.635603405463275d0, 49.30517214248421d0, &
+  63.652703529910404d0, 49.30517214248421d0, 18.635603405463275d0, &
+  18.635603405463275d0, 55.90681021638982d0, 85.39906322671229d0, &
+  85.39906322671229d0, 55.90681021638983d0, 18.635603405463275d0, &
+  14.86606874731851d0, 49.30517214248421d0, 85.39906322671229d0, &
+  101.04553429023969d0, 85.3990632267123d0, 49.30517214248421d0, &
+  14.866068747318508d0, 9.219544457292889d0, 33.24154027718933d0, &
+  63.652703529910404d0, 85.39906322671229d0, 85.3990632267123d0, &
+  63.65270352991039d0, 33.24154027718933d0, 9.219544457292887d0, &
+  4.1231056256176615d0, 15.968719422671313d0, 33.24154027718933d0, &
+  49.30517214248421d0, 55.90681021638983d0, 49.30517214248421d0, &
+  33.24154027718933d0, 15.968719422671313d0, 4.1231056256176615d0, 1.0d0, &
+  4.1231056256176615d0, 9.219544457292889d0, 14.866068747318508d0, &
+  18.635603405463275d0, 18.635603405463275d0, 14.866068747318508d0, &
+  9.219544457292887d0, 4.1231056256176615d0, 1.d0 /)
+
 END_PROVIDER
 
+
+
+
diff --git a/src/ao_one_e_ints/ao_one_e_ints.irp.f b/src/ao_one_e_ints/ao_one_e_ints.irp.f
index 2c6b8e7e..8b0edfbc 100644
--- a/src/ao_one_e_ints/ao_one_e_ints.irp.f
+++ b/src/ao_one_e_ints/ao_one_e_ints.irp.f
@@ -45,3 +45,19 @@ BEGIN_PROVIDER [ double precision, ao_one_e_integrals_imag,(ao_num,ao_num)]
 
 END_PROVIDER
 
+ BEGIN_PROVIDER [ double precision, ao_sphe_one_e_integrals,(ao_sphe_num,ao_sphe_num)]
+&BEGIN_PROVIDER [ double precision, ao_sphe_one_e_integrals_diag,(ao_sphe_num)]
+  implicit none
+  integer :: i,j,n,l
+  BEGIN_DOC
+ ! One-electron Hamiltonian in the spherical |AO| basis.
+  END_DOC
+
+  ao_sphe_one_e_integrals = ao_sphe_integrals_n_e + ao_sphe_kinetic_integrals
+
+  do j = 1, ao_num
+    ao_sphe_one_e_integrals_diag(j) = ao_sphe_one_e_integrals(j,j)
+  enddo
+
+END_PROVIDER
+
diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
index 747d9183..4e4d33f0 100644
--- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f
+++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
@@ -1,9 +1,12 @@
  BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)]
+&BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_normalization, (ao_num)]
 &BEGIN_PROVIDER [ integer, ao_sphe_num ]
   implicit none
   BEGIN_DOC
 ! Coefficients to go from cartesian to spherical coordinates in the current
 ! basis set
+!
+!  S_cart^-1 <cart|sphe>
   END_DOC
   integer :: i
   integer, external              :: ao_power_index
@@ -11,6 +14,7 @@
   integer                        :: prev
   prev = 0
   ao_cart_to_sphe_coef(:,:) = 0.d0
+  ao_cart_to_sphe_normalization(:) = 1.d0
   ! Assume order provided by ao_power_index
   i = 1
   ao_sphe_num = 0
@@ -19,6 +23,7 @@
       case (0)
         ao_sphe_num += 1
         ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0
+        ao_cart_to_sphe_normalization(i) = 1.d0
         i += 1
       BEGIN_TEMPLATE
       case ($SHELL)
@@ -28,6 +33,9 @@
               ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k)
             enddo
           enddo
+          do j=1,size(cart_to_sphe_$SHELL,1)
+            ao_cart_to_sphe_normalization(i+j-1) = cart_to_sphe_norm_$SHELL(j)
+          enddo
           i += size(cart_to_sphe_$SHELL,1)
           ao_sphe_num += size(cart_to_sphe_$SHELL,2)
         endif
@@ -47,28 +55,7 @@
     end select
   enddo
 
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ]
- implicit none
- BEGIN_DOC
- ! |AO| overlap matrix in the spherical basis set
- END_DOC
- double precision, allocatable :: tmp(:,:)
- allocate (tmp(ao_sphe_num,ao_num))
-
- call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
-   ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), &
-   S_inv,size(ao_overlap,1), 0.d0, &
-   tmp, size(tmp,1))
-
- call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
-   tmp, size(tmp,1), &
-   ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), 0.d0, &
-   ao_sphe_overlap,size(ao_sphe_overlap,1))
-
- deallocate(tmp)
-
+print *, ao_cart_to_sphe_normalization(:)
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ]
@@ -77,9 +64,26 @@ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ]
  ! Inverse of :c:data:`ao_cart_to_sphe_coef`
  END_DOC
 
- call get_pseudo_inverse(ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1),&
-   ao_num,ao_sphe_num, &
-   ao_cart_to_sphe_inv, size(ao_cart_to_sphe_inv,1), lin_dep_cutoff)
+  ! Normalize
+  integer :: m,k
+  double precision, allocatable :: S(:,:), R(:,:), Rinv(:,:), Sinv(:,:)
+
+  k = size(ao_cart_to_sphe_coef,1)
+  m = size(ao_cart_to_sphe_coef,2)
+
+  allocate(S(k,k), R(k,m), Rinv(m,k), Sinv(k,k))
+
+  R(:,:) = ao_cart_to_sphe_coef(:,:)
+
+  call dgemm('N','T', m, m, k, 1.d0, R, k, R, k, 0.d0, S, m)
+  call get_pseudo_inverse(S, k, k, m, Sinv, k, 1.d-20)
+  call dgemm('T','T', m, m, k, 1.d0, R, k, Sinv, k, 0.d0, Rinv, m)
+
+  integer :: i
+  do i=1,ao_num
+    ao_cart_to_sphe_inv(:,i) = Rinv(:,i)  !/ ao_cart_to_sphe_normalization(i)
+  enddo
+
 END_PROVIDER
 
 
diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f
index e5341f6a..cc676c4b 100644
--- a/src/ao_one_e_ints/ao_overlap.irp.f
+++ b/src/ao_one_e_ints/ao_overlap.irp.f
@@ -308,3 +308,26 @@ BEGIN_PROVIDER [ double precision, S_half, (ao_num,ao_num)  ]
 
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, ao_sphe_overlap, (ao_sphe_num,ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |AO| overlap matrix in the spherical basis set
+ END_DOC
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), &
+   ao_overlap,size(ao_overlap,1), 0.d0, &
+   tmp, size(tmp,1))
+
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, &
+   ao_sphe_overlap,size(ao_sphe_overlap,1))
+
+ deallocate(tmp)
+
+END_PROVIDER
+
diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f
index 49eb56ad..f450721d 100644
--- a/src/ao_one_e_ints/kin_ao_ints.irp.f
+++ b/src/ao_one_e_ints/kin_ao_ints.irp.f
@@ -190,3 +190,25 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integrals_imag, (ao_num,ao_num)]
   endif
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, ao_sphe_kinetic_integrals, (ao_sphe_num,ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |AO| kinetic inntegrals matrix in the spherical basis set
+ END_DOC
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), &
+   ao_kinetic_integrals,size(ao_kinetic_integrals,1), 0.d0, &
+   tmp, size(tmp,1))
+
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, &
+   ao_sphe_kinetic_integrals,size(ao_sphe_kinetic_integrals,1))
+
+ deallocate(tmp)
+
+END_PROVIDER
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 776b5ec0..c1544a5d 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -609,3 +609,25 @@ double precision function V_r(n,alpha)
 end
 
 
+
+BEGIN_PROVIDER [ double precision, ao_sphe_integrals_n_e, (ao_sphe_num,ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |AO| VneVne  inntegrals matrix in the spherical basis set
+ END_DOC
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), &
+   ao_integrals_n_e,size(ao_integrals_n_e,1), 0.d0, &
+   tmp, size(tmp,1))
+
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, &
+   ao_sphe_integrals_n_e,size(ao_sphe_integrals_n_e,1))
+
+ deallocate(tmp)
+
+END_PROVIDER
diff --git a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
index e75ca056..851f26d8 100644
--- a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
@@ -296,3 +296,67 @@ END_PROVIDER
  enddo
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals_local, (ao_sphe_num,ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |AO| pseudo_integrals_local matrix in the spherical basis set
+ END_DOC
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), &
+   ao_pseudo_integrals_local,size(ao_pseudo_integrals_local,1), 0.d0, &
+   tmp, size(tmp,1))
+
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, &
+   ao_sphe_pseudo_integrals_local,size(ao_sphe_pseudo_integrals_local,1))
+
+ deallocate(tmp)
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals_non_local, (ao_sphe_num,ao_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |AO| pseudo_integrals_non_local matrix in the spherical basis set
+ END_DOC
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), &
+   ao_pseudo_integrals_non_local,size(ao_pseudo_integrals_non_local,1), 0.d0, &
+   tmp, size(tmp,1))
+
+ call dgemm('N','N',ao_sphe_num,ao_sphe_num,ao_num, 1.d0, &
+   tmp, size(tmp,1), &
+   ao_cart_to_sphe_inv,size(ao_cart_to_sphe_inv,1), 0.d0, &
+   ao_sphe_pseudo_integrals_non_local,size(ao_sphe_pseudo_integrals_non_local,1))
+
+ deallocate(tmp)
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, ao_sphe_pseudo_integrals, (ao_sphe_num,ao_sphe_num)]
+  implicit none
+  BEGIN_DOC
+  ! Pseudo-potential integrals in the |AO| basis set.
+  END_DOC
+
+  ao_sphe_pseudo_integrals = 0.d0
+  if (do_pseudo) then
+    if (pseudo_klocmax > 0) then
+      ao_sphe_pseudo_integrals += ao_sphe_pseudo_integrals_local
+    endif
+    if (pseudo_kmax > 0) then
+      ao_sphe_pseudo_integrals += ao_sphe_pseudo_integrals_non_local
+    endif
+  endif
+
+END_PROVIDER
+
diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f
index a8b5441d..1eecca6c 100644
--- a/src/mo_basis/mos.irp.f
+++ b/src/mo_basis/mos.irp.f
@@ -347,11 +347,19 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
 end
 
 
-BEGIN_PROVIDER [ double precision, mo_coef_spherical]
+BEGIN_PROVIDER [ double precision, mo_sphe_coef, (ao_sphe_num, mo_num) ]
  implicit none
  BEGIN_DOC
  ! MO coefficients in the basis of spherical harmonics AOs.
  END_DOC
- 
+ double precision, allocatable :: tmp(:,:)
+ allocate (tmp(ao_sphe_num,ao_num))
+
+ call dgemm('T','N',ao_sphe_num,ao_num,ao_num, 1.d0, &
+   ao_cart_to_sphe_coef,ao_num, &
+   mo_coef,size(mo_coef,1), 0.d0, &
+   mo_sphe_coef, size(mo_sphe_coef,1))
+
+ deallocate (tmp)
 END_PROVIDER
 
diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg
index 88828520..22dbea00 100644
--- a/src/trexio/EZFIO.cfg
+++ b/src/trexio/EZFIO.cfg
@@ -22,6 +22,13 @@ doc: If True, export MO coefficients
 interface: ezfio, ocaml, provider
 default: True
 
+[export_cartesian]
+type: logical
+doc: If False, export everything in the spherical AO basis
+interface: ezfio, ocaml, provider
+default: True
+
+
 [export_ao_one_e_ints]
 type: logical
 doc: If True, export one-electron integrals in AO basis
diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f
index cf1327b6..53b21dc9 100644
--- a/src/trexio/export_trexio_routines.irp.f
+++ b/src/trexio/export_trexio_routines.irp.f
@@ -77,6 +77,7 @@ subroutine export_trexio(update,full_path)
     rc = trexio_read_metadata_code_num(f(k), code_num)
     if (rc == TREXIO_ATTR_MISSING) then
       i = 1
+      code_num = 0
       code(:) = ""
     else
       rc = trexio_read_metadata_code(f(k), code, 64)
@@ -97,6 +98,7 @@ subroutine export_trexio(update,full_path)
     rc = trexio_read_metadata_author_num(f(k), author_num)
     if (rc == TREXIO_ATTR_MISSING) then
       i = 1
+      author_num = 0
       author(:) = ""
     else
       rc = trexio_read_metadata_author(f(k), author, 64)
@@ -305,35 +307,46 @@ subroutine export_trexio(update,full_path)
 
     print *, 'AOs'
 
-    rc = trexio_write_ao_num(f(1), ao_num)
-    call trexio_assert(rc, TREXIO_SUCCESS)
+    if (export_cartesian) then
+      rc = trexio_write_ao_cartesian(f(1), 1)
+      call trexio_assert(rc, TREXIO_SUCCESS)
 
-    rc = trexio_write_ao_cartesian(f(1), 1)
-    call trexio_assert(rc, TREXIO_SUCCESS)
+      rc = trexio_write_ao_num(f(1), ao_num)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_shell(f(1), ao_shell)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      if (ezfio_convention >= 20250211) then
+        rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor)
+      else
+
+        allocate(factor(ao_num))
+        do i=1,ao_num
+          l = ao_first_of_shell(ao_shell(i))
+          factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
+        enddo
+        rc = trexio_write_ao_normalization(f(1), factor)
+        deallocate(factor)
+      endif
+
+      call trexio_assert(rc, TREXIO_SUCCESS)
 
-    rc = trexio_write_ao_shell(f(1), ao_shell)
-    call trexio_assert(rc, TREXIO_SUCCESS)
 
-    if (ezfio_convention >= 20250211) then
-      rc = trexio_write_ao_normalization(f(1), ao_coef_normalization_factor)
     else
-      integer :: pow0(3), powA(3), nz
-      double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c
-      nz=100
+      rc = trexio_write_ao_cartesian(f(1), 0)
+      call trexio_assert(rc, TREXIO_SUCCESS)
 
-      C_A(1) = 0.d0
-      C_A(2) = 0.d0
-      C_A(3) = 0.d0
+      rc = trexio_write_ao_num(f(1), ao_sphe_num)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_shell(f(1), ao_sphe_shell)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_normalization(f(1), ao_sphe_coef_normalization_factor)
+      call trexio_assert(rc, TREXIO_SUCCESS)
 
-      allocate(factor(ao_num))
-      do i=1,ao_num
-        l = ao_first_of_shell(ao_shell(i))
-        factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0))
-      enddo
-      rc = trexio_write_ao_normalization(f(1), factor)
-      deallocate(factor)
     endif
-    call trexio_assert(rc, TREXIO_SUCCESS)
 
   endif
 
@@ -341,23 +354,45 @@ subroutine export_trexio(update,full_path)
 ! ------------------
 
   if (export_ao_one_e_ints) then
-    print *, 'AO one-e integrals'
 
-    rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap)
-    call trexio_assert(rc, TREXIO_SUCCESS)
+    double precision, allocatable :: tmp_ao(:,:)
+    if (export_cartesian) then
+      print *, 'AO one-e integrals (cartesian)'
 
-    rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals)
-    call trexio_assert(rc, TREXIO_SUCCESS)
-
-    rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e)
-    call trexio_assert(rc, TREXIO_SUCCESS)
-
-    if (do_pseudo) then
-      rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local)
+      rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap)
       call trexio_assert(rc, TREXIO_SUCCESS)
-    endif
 
-    rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals)
+      rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      if (do_pseudo) then
+        rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals)
+        call trexio_assert(rc, TREXIO_SUCCESS)
+      endif
+
+      rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals)
+    else
+      print *, 'AO one-e integrals (spherical)'
+
+      rc = trexio_write_ao_1e_int_overlap(f(1),ao_sphe_overlap)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_1e_int_kinetic(f(1),ao_sphe_kinetic_integrals)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_sphe_integrals_n_e)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+
+      if (do_pseudo) then
+        rc = trexio_write_ao_1e_int_ecp(f(1), ao_sphe_pseudo_integrals)
+        call trexio_assert(rc, TREXIO_SUCCESS)
+      endif
+
+      rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_sphe_one_e_integrals)
+    endif
     call trexio_assert(rc, TREXIO_SUCCESS)
   end if
 
@@ -465,8 +500,13 @@ subroutine export_trexio(update,full_path)
       call trexio_assert(rc, TREXIO_SUCCESS)
     enddo
 
-    rc = trexio_write_mo_coefficient(f(1), mo_coef)
-    call trexio_assert(rc, TREXIO_SUCCESS)
+    if (export_cartesian) then
+      rc = trexio_write_mo_coefficient(f(1), mo_coef)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+    else
+      rc = trexio_write_mo_coefficient(f(1), mo_sphe_coef)
+      call trexio_assert(rc, TREXIO_SUCCESS)
+    endif
 
     if ( (trim(mo_label) == 'Canonical').and. &
          (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 4e7ca87d..629a998b 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1392,15 +1392,6 @@ subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff)
 
   call dgemm('T', 'T', n, m, n_svd, 1.d0, Vt, size(Vt,1), U, size(U,1), 0.d0, C, size(C,1))
 
-!  C = 0.d0
-!  do i=1,m
-!    do j=1,n
-!      do k=1,n_svd
-!        C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j)
-!      enddo
-!    enddo
-!  enddo
-
   deallocate(U,D,Vt,work,A_tmp)
 
 end

From b5543cb37a8ab47716d1946f59322c0a6f2e7a1d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 14 Mar 2025 22:34:49 +0100
Subject: [PATCH 59/59] Introduced ao_sphe_num

---
 external/irpf90                            |  2 +-
 src/ao_basis/aos.irp.f                     | 13 +++++++++++++
 src/ao_one_e_ints/ao_ortho_canonical.irp.f | 17 +++++++++--------
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/external/irpf90 b/external/irpf90
index 4ab1b175..43160c60 160000
--- a/external/irpf90
+++ b/external/irpf90
@@ -1 +1 @@
-Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6
+Subproject commit 43160c60d88d9f61fb97cc0b35477c8eb0df862b
diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f
index d9381015..02eedf53 100644
--- a/src/ao_basis/aos.irp.f
+++ b/src/ao_basis/aos.irp.f
@@ -22,6 +22,19 @@ BEGIN_PROVIDER [ integer, ao_shell, (ao_num) ]
  enddo
 END_PROVIDER
 
+BEGIN_PROVIDER [ integer, ao_sphe_num ]
+ implicit none
+ BEGIN_DOC
+ ! Number of spherical AOs
+ END_DOC
+ integer :: n, i
+ ao_sphe_num=0
+ do i=1,shell_num
+   n = shell_ang_mom(i)
+   ao_sphe_num += 2*n+1
+ enddo
+END_PROVIDER
+
 BEGIN_PROVIDER [ integer, ao_sphe_shell, (ao_sphe_num) ]
  implicit none
  BEGIN_DOC
diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
index 4e4d33f0..b19be1e2 100644
--- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f
+++ b/src/ao_one_e_ints/ao_ortho_canonical.irp.f
@@ -1,6 +1,5 @@
  BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)]
 &BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_normalization, (ao_num)]
-&BEGIN_PROVIDER [ integer, ao_sphe_num ]
   implicit none
   BEGIN_DOC
 ! Coefficients to go from cartesian to spherical coordinates in the current
@@ -11,18 +10,18 @@
   integer :: i
   integer, external              :: ao_power_index
   integer                        :: ibegin,j,k
-  integer                        :: prev
+  integer                        :: prev, ao_sphe_count
   prev = 0
   ao_cart_to_sphe_coef(:,:) = 0.d0
   ao_cart_to_sphe_normalization(:) = 1.d0
   ! Assume order provided by ao_power_index
   i = 1
-  ao_sphe_num = 0
+  ao_sphe_count = 0
   do while (i <= ao_num)
     select case ( ao_l(i) )
       case (0)
-        ao_sphe_num += 1
-        ao_cart_to_sphe_coef(i,ao_sphe_num) = 1.d0
+        ao_sphe_count += 1
+        ao_cart_to_sphe_coef(i,ao_sphe_count) = 1.d0
         ao_cart_to_sphe_normalization(i) = 1.d0
         i += 1
       BEGIN_TEMPLATE
@@ -30,14 +29,14 @@
         if (ao_power(i,1) == $SHELL) then
           do k=1,size(cart_to_sphe_$SHELL,2)
             do j=1,size(cart_to_sphe_$SHELL,1)
-              ao_cart_to_sphe_coef(i+j-1,ao_sphe_num+k) = cart_to_sphe_$SHELL(j,k)
+              ao_cart_to_sphe_coef(i+j-1,ao_sphe_count+k) = cart_to_sphe_$SHELL(j,k)
             enddo
           enddo
           do j=1,size(cart_to_sphe_$SHELL,1)
             ao_cart_to_sphe_normalization(i+j-1) = cart_to_sphe_norm_$SHELL(j)
           enddo
           i += size(cart_to_sphe_$SHELL,1)
-          ao_sphe_num += size(cart_to_sphe_$SHELL,2)
+          ao_sphe_count += size(cart_to_sphe_$SHELL,2)
         endif
       SUBST [ SHELL ]
         1;;
@@ -55,7 +54,9 @@
     end select
   enddo
 
-print *, ao_cart_to_sphe_normalization(:)
+  if (ao_sphe_count /= ao_sphe_num) then
+    call qp_bug(irp_here, ao_sphe_count, "ao_sphe_count /= ao_sphe_num")
+  endif
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_inv, (ao_sphe_num,ao_num) ]