From 347f7162943a5479ec297417b51d905ad62c0c2e Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Thu, 3 Nov 2022 15:29:57 +0100
Subject: [PATCH 01/35] Working on fixing n_int.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 398 ++++++++++++---
 src/csf/configurations.irp.f                 |   5 +
 src/csf/obtain_I_foralpha.irp.f              | 507 ++++++++++++++++---
 src/csf/sigma_vector.irp.f                   | 115 +++--
 src/determinants/slater_rules.irp.f          |  94 +++-
 5 files changed, 917 insertions(+), 202 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index cea7640c..167b41fc 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -12,6 +12,7 @@ use bitmasks
 
   integer                        :: idxI ! The id of the Ith CFG
   integer(bit_kind)              :: Icfg(N_int,2)
+  integer(bit_kind)              :: Jcfg(N_int,2)
   integer                        :: NalphaIcfg
   logical,dimension(:,:),allocatable :: tableUniqueAlphas
   integer                        :: listholes(mo_num)
@@ -20,10 +21,10 @@ use bitmasks
   integer                        :: nvmos
   integer                        :: listvmos(mo_num)
   integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
-  integer*8                      :: Idomo
-  integer*8                      :: Isomo
-  integer*8                      :: Jdomo
-  integer*8                      :: Jsomo
+  integer*8                      :: Idomo, Idomop, Idomoq 
+  integer*8                      :: Isomo, Isomop, Isomoq
+  integer*8                      :: Jdomo, Jdomop, Jdomoq
+  integer*8                      :: Jsomo, Jsomop, Jsomoq
   integer*8                      :: diffSOMO
   integer*8                      :: diffDOMO
   integer*8                      :: xordiffSOMODOMO
@@ -31,20 +32,21 @@ use bitmasks
   integer                        :: ndiffDOMO
   integer                        :: nxordiffSOMODOMO
   integer                        :: ndiffAll
-  integer                        :: i,ii
-  integer                        :: j,jj
+  integer                        :: i,ii,iii
+  integer                        :: j,jj, i_s, i_d
   integer                        :: k,kk
   integer                        :: kstart
   integer                        :: kend
-  integer                        :: Nsomo_I
-  integer                        :: hole
-  integer                        :: p
-  integer                        :: q
+  integer                        :: Nsomo_I, Nsomo_J
+  integer                        :: hole, n_core_orb_64
+  integer                        :: p, pp, p_s
+  integer                        :: q, qq, q_s
   integer                        :: countalphas
   logical                        :: pqAlreadyGenQ
   logical                        :: pqExistsQ
   logical                        :: ppExistsQ
   integer*8                      :: MS
+  integer :: listall(N_int*bit_kind_size), nelall
 
   double precision               :: t0, t1
   call wall_time(t0)
@@ -57,6 +59,9 @@ use bitmasks
   do idxI = 1, N_configuration
 
     Icfg  = psi_configuration(:,:,idxI)
+    Jcfg  = psi_configuration(:,:,idxI)
+    !print *," Jcfg somo=",Jcfg(1,1), " ", Jcfg(2,1)
+    !print *," Jcfg domo=",Jcfg(1,2), " ", Jcfg(2,2)
 
     Isomo = iand(act_bitmask(1,1),Icfg(1,1))
     Idomo = iand(act_bitmask(1,1),Icfg(1,2))
@@ -64,38 +69,89 @@ use bitmasks
     ! find out all pq holes possible
     nholes = 0
     ! holes in SOMO
-    do ii = 1,n_act_orb
-      i = list_act(ii)
-      if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
-        nholes += 1
-        listholes(nholes) = i
-        holetype(nholes) = 1
-      endif
-    end do
+    !do ii = 1,n_act_orb
+    !  i = list_act(ii)
+    !  if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+    !    nholes += 1
+    !    listholes(nholes) = i
+    !    holetype(nholes) = 1
+    !  endif
+    !end do
+        call bitstring_to_list(psi_configuration(1,1,idxI),listall,nelall,N_int)
+
+        !print *,'list somo'
+        do iii=1,nelall
+          nholes += 1
+          listholes(nholes) = listall(iii)
+            !print *,listall(iii)
+          holetype(nholes) = 1
+        end do
+
+        Nsomo_I = nelall
+
     ! holes in DOMO
-    do ii = 1,n_act_orb
-      i = list_act(ii)
-      if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
-        nholes += 1
-        listholes(nholes) = i
-        holetype(nholes) = 2
-      endif
-    end do
+    !do ii = 1,n_act_orb
+    !  i = list_act(ii)
+    !  if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+    !    nholes += 1
+    !    listholes(nholes) = i
+    !    holetype(nholes) = 2
+    !  endif
+    !end do
+
+    !do iii=1,N_int
+    !  print *,' iii=',iii, psi_configuration(iii,2,idxI), ' idxI=',idxI
+    !end do
+        call bitstring_to_list(psi_configuration(1,2,idxI),listall,nelall,N_int)
+
+        !print *,'list domo ncore=',n_core_orb, ' nelall=',nelall
+        do iii=1,nelall
+          if(listall(iii) .gt. n_core_orb)then
+            nholes += 1
+            listholes(nholes) = listall(iii)
+            !print *,listall(iii)
+            holetype(nholes) = 2
+          endif
+        end do
 
     ! find vmos
     listvmos = -1
     vmotype = -1
     nvmos = 0
-    do ii = 1,n_act_orb
-      i = list_act(ii)
+    !do ii = 1,n_act_orb
+    !  i = list_act(ii)
+    !  if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+    !    if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+    !      nvmos += 1
+    !      listvmos(nvmos) = i
+    !      print *,'1 i=',i
+    !      vmotype(nvmos) = 1
+    !    else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+    !      nvmos += 1
+    !      listvmos(nvmos) = i
+    !      print *,'2 i=',i
+    !      vmotype(nvmos) = 2
+    !    end if
+    !  end if
+    !end do
+    !print *,'-----------'
+
+    ! Take into account N_int
+    do ii = 1, n_act_orb
+      iii = list_act(ii)
+      i_s = (1+((iii-1)/63))
+      i = iii - ( i_s -1 )*63 
+      Isomo = iand(act_bitmask(i_s,1),Icfg(i_s,1))
+      Idomo = iand(act_bitmask(i_s,1),Icfg(i_s,2))
+
       if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
         if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
           nvmos += 1
-          listvmos(nvmos) = i
+          listvmos(nvmos) = iii
           vmotype(nvmos) = 1
         else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
           nvmos += 1
-          listvmos(nvmos) = i
+          listvmos(nvmos) = iii
           vmotype(nvmos) = 2
         end if
       end if
@@ -106,7 +162,7 @@ use bitmasks
     ! Now find the allowed (p,q) excitations
     Isomo = iand(act_bitmask(1,1),Icfg(1,1))
     Idomo = iand(act_bitmask(1,1),Icfg(1,2))
-    Nsomo_I = POPCNT(Isomo)
+    !Nsomo_I = POPCNT(Isomo)
     if(Nsomo_I .EQ. 0) then
       kstart = 1
     else
@@ -115,24 +171,54 @@ use bitmasks
     kend = idxI-1
 
     do i = 1,nholes
-      p = listholes(i)
+      pp  = listholes(i)
+      p_s = (1+((pp-1)/63))
+      p   = pp - (p_s - 1)*63
+      !print *,' pp=',pp, ' p_s=',p_s, ' p=',p
       do j = 1,nvmos
-        q = listvmos(j)
+        qq  = listvmos(j)
+        q_s = (1+((qq-1)/63))
+        q   = qq - (q_s - 1)*63
+        !print *,' qq=',qq, ' q_s=',q_s, ' q=',q
+        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
+        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
+        Isomop = iand(act_bitmask(i_s,1),Icfg(q_s,1))
+        Idomop = iand(act_bitmask(i_s,1),Icfg(q_s,2))
         if(p .EQ. q) cycle
         if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
           ! SOMO -> VMO
-          Jsomo = IBCLR(Isomo,p-1)
-          Jsomo = IBSET(Jsomo,q-1)
-          Jdomo = Idomo
+          !print *,'SOMO -> VMO'
+          if (p_s .eq. q_s) then
+            Jsomop = IBCLR(Isomop,p-1)
+            Jsomop = IBSET(Jsomop,q-1)
+            Jsomoq = Jsomop
+          else
+            Jsomop = IBCLR(Isomop,p-1)
+            Jsomoq = IBSET(Isomoq,q-1)
+          endif
+
+          ! Domo remains the same
+          Jdomop = Idomop
+          Jdomoq = Idomoq
+
           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
           kend = idxI-1
         else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
           ! SOMO -> SOMO
-          Jsomo = IBCLR(Isomo,p-1)
-          Jsomo = IBCLR(Jsomo,q-1)
-          Jdomo = IBSET(Idomo,q-1)
+          !print *,'SOMO -> SOMO'
+          if(p_s .eq. q_s) then
+            Jsomop = IBCLR(Isomop,p-1)
+            Jsomop = IBCLR(Jsomop,q-1)
+            Jsomoq = Jsomop
+          else
+            Jsomop = IBCLR(Isomop,p-1)
+            Jsomoq = IBCLR(Isomoq,q-1)
+          endif
+
+          Jdomoq = IBSET(Idomoq,q-1)
+
           ! Check for Minimal alpha electrons (MS)
-          if(POPCNT(Jsomo).ge.MS)then
+          if(POPCNT(Jsomoq).ge.MS)then
             kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
             kend = idxI-1
           else
@@ -140,24 +226,60 @@ use bitmasks
           endif
         else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
           ! DOMO -> VMO
-          Jsomo = IBSET(Isomo,p-1)
-          Jsomo = IBSET(Jsomo,q-1)
-          Jdomo = IBCLR(Idomo,p-1)
+          !print *,'DOMO -> VMO', Isomop, p, q, Jsomop
+          if(p_s .eq. q_s) then
+            Jsomop = IBSET(Isomop,p-1)
+            Jsomop = IBSET(Jsomop,q-1)
+            Jsomoq = Jsomop
+          else
+            Jsomop = IBSET(Isomop,p-1)
+            Jsomoq = IBSET(Jsomoq,q-1)
+          endif
+          !print *, 'Jsomop=', Jsomop
+
+          Jdomop = IBCLR(Idomop,p-1)
+
           kstart = cfg_seniority_index(Nsomo_I)
           kend = idxI-1
         else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
           ! DOMO -> SOMO
-          Jsomo = IBSET(Isomo,p-1)
-          Jsomo = IBCLR(Jsomo,q-1)
-          Jdomo = IBCLR(Idomo,p-1)
-          Jdomo = IBSET(Jdomo,q-1)
+          !print *,'DOMO -> SOMO'
+          if(p_s .eq. q_s) then
+            Jsomop = IBSET(Isomop,p-1)
+            Jsomop = IBCLR(Jsomop,q-1)
+            Jsomoq = Jsomop
+
+            Jdomop = IBCLR(Idomop,p-1)
+            Jdomop = IBSET(Jdomop,q-1)
+            Jdomoq = Jdomop
+          else
+            Jsomop = IBSET(Isomop,p-1)
+            Jsomoq = IBCLR(Jsomoq,q-1)
+
+            Jdomop = IBCLR(Idomop,p-1)
+            Jdomoq = IBSET(Jdomoq,q-1)
+          endif
+
           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
           kend = idxI-1
         else
           print*,"Something went wrong in obtain_associated_alphaI"
         endif
+
+        ! Save it to Jcfg
+        !print *,i,j,"0| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
+        Jcfg(p_s,1) = Jsomop
+        Jcfg(q_s,1) = Jsomoq
+        Jcfg(p_s,2) = Jdomop
+        Jcfg(q_s,2) = Jdomoq
+        !print *,'p_s=',p_s,' q_s=', q_s
+        !print *,'Jsomop=',Jsomop, ' Jsomoq=', Jsomoq, ' Jdomop=', Jdomop, ' Jdomoq=', Jdomo
+        !print *,i,j,"1| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
+        call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int)
+        Nsomo_J = nelall
+
         ! Check for Minimal alpha electrons (MS)
-        if(POPCNT(Jsomo).lt.MS)then
+        if(Nsomo_J.lt.MS)then
           cycle
         endif
 
@@ -169,15 +291,32 @@ use bitmasks
         pqAlreadyGenQ = .FALSE.
         ! First check if it can be generated before
         do k = kstart, kend
-          diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
-          ndiffSOMO = POPCNT(diffSOMO)
-          if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
-          diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
-          xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-          ndiffDOMO = POPCNT(diffDOMO)
-          nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-          nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
-          !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+          !diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
+          !ndiffSOMO = POPCNT(diffSOMO)
+          !if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+          !diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
+          !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+          !ndiffDOMO = POPCNT(diffDOMO)
+          !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+          !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+
+          ndiffSOMO = 0
+          ndiffDOMO = 0
+          nxordiffSOMODOMO = 0
+          do ii = 1, N_int
+            Jsomo = Jcfg(ii,1)
+            Jdomo = Jcfg(ii,2)
+            diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+            ndiffSOMO += POPCNT(diffSOMO)
+            diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+            xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+            ndiffDOMO += POPCNT(diffDOMO)
+            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+            nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+          end do
+
+          if((ndiffSOMO .ne. 0) .and. (ndiffSOMO .ne. 2)) cycle
+
           if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
             pqAlreadyGenQ = .TRUE.
             ppExistsQ = .TRUE.
@@ -208,22 +347,57 @@ use bitmasks
     Jdomo = Icfg(1,2)
     NalphaIcfg = 0
     do i = 1, nholes
-      p = listholes(i)
+      !p = listholes(i)
+      pp  = listholes(i)
+      p_s = (1+((pp-1)/63))
+      p   = pp - (p_s - 1)*63
       do j = 1, nvmos
-        q = listvmos(j)
+        !q = listvmos(j)
+        qq  = listvmos(j)
+        q_s = (1+((qq-1)/63))
+        q   = qq - (q_s - 1)*63
+        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
+        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
+        Isomoq = iand(act_bitmask(i_s,1),Icfg(q_s,1))
+        Idomoq = iand(act_bitmask(i_s,1),Icfg(q_s,2))
         if(p .EQ. q) cycle
         if(tableUniqueAlphas(p,q)) then
           if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
             ! SOMO -> VMO
-            Jsomo = IBCLR(Isomo,p-1)
-            Jsomo = IBSET(Jsomo,q-1)
-            Jdomo = Idomo
+            !Jsomo = IBCLR(Isomo,p-1)
+            !Jsomo = IBSET(Jsomo,q-1)
+            !Jdomo = Idomo
+            if (p_s .eq. q_s) then
+              Jsomop = IBCLR(Isomop,p-1)
+              Jsomop = IBSET(Jsomop,q-1)
+              Jsomoq = Jsomop
+            else
+              Jsomop = IBCLR(Isomop,p-1)
+              Jsomoq = IBSET(Isomoq,q-1)
+            endif
+
+            ! Domo remains the same
+            Jdomop = Idomop
+            Jdomoq = Idomoq
+
           else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
             ! SOMO -> SOMO
-            Jsomo = IBCLR(Isomo,p-1)
-            Jsomo = IBCLR(Jsomo,q-1)
-            Jdomo = IBSET(Idomo,q-1)
-            if(POPCNT(Jsomo).ge.MS)then
+            !Jsomo = IBCLR(Isomo,p-1)
+            !Jsomo = IBCLR(Jsomo,q-1)
+            !Jdomo = IBSET(Idomo,q-1)
+
+            if(p_s .eq. q_s) then
+              Jsomop = IBCLR(Isomop,p-1)
+              Jsomop = IBCLR(Jsomop,q-1)
+              Jsomoq = Jsomop
+            else
+              Jsomop = IBCLR(Isomop,p-1)
+              Jsomoq = IBCLR(Isomoq,q-1)
+            endif
+
+            Jdomoq = IBSET(Idomoq,q-1)
+
+            if(POPCNT(Jsomoq).ge.MS)then
               kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
               kend = idxI-1
             else
@@ -231,26 +405,74 @@ use bitmasks
             endif
           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
             ! DOMO -> VMO
-            Jsomo = IBSET(Isomo,p-1)
-            Jsomo = IBSET(Jsomo,q-1)
-            Jdomo = IBCLR(Idomo,p-1)
+            !Jsomo = IBSET(Isomo,p-1)
+            !Jsomo = IBSET(Jsomo,q-1)
+            !Jdomo = IBCLR(Idomo,p-1)
+
+            if(p_s .eq. q_s) then
+              Jsomop = IBSET(Isomop,p-1)
+              Jsomop = IBSET(Jsomop,q-1)
+              Jsomoq = Jsomop
+            else
+              Jsomop = IBSET(Isomop,p-1)
+              Jsomoq = IBSET(Jsomoq,q-1)
+            endif
+
+            Jdomop = IBCLR(Idomop,p-1)
+
           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
             ! DOMO -> SOMO
-            Jsomo = IBSET(Isomo,p-1)
-            Jsomo = IBCLR(Jsomo,q-1)
-            Jdomo = IBCLR(Idomo,p-1)
-            Jdomo = IBSET(Jdomo,q-1)
+            !Jsomo = IBSET(Isomo,p-1)
+            !Jsomo = IBCLR(Jsomo,q-1)
+            !Jdomo = IBCLR(Idomo,p-1)
+            !Jdomo = IBSET(Jdomo,q-1)
+            if(p_s .eq. q_s) then
+              Jsomop = IBSET(Isomop,p-1)
+              Jsomop = IBCLR(Jsomop,q-1)
+              Jsomoq = Jsomop
+
+              Jdomop = IBCLR(Idomop,p-1)
+              Jdomop = IBSET(Jdomop,q-1)
+              Jdomoq = Jdomop
+            else
+              Jsomop = IBSET(Isomop,p-1)
+              Jsomoq = IBCLR(Jsomoq,q-1)
+
+              Jdomop = IBCLR(Idomop,p-1)
+              Jdomoq = IBSET(Jdomoq,q-1)
+            endif
+
           else
             print*,"Something went wrong in obtain_associated_alphaI"
           endif
 
+          ! Save it to Jcfg
+          Jcfg(p_s,1) = Jsomop
+          Jcfg(q_s,1) = Jsomoq
+          Jcfg(p_s,2) = Jdomop
+          Jcfg(q_s,2) = Jdomoq
+
           ! SOMO
           !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
           if(POPCNT(Jsomo) .ge. NSOMOMin) then
             NalphaIcfg += 1
-            alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo
-            alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+            alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
+            !alphasIcfg_list(:,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+            if(n_core_orb .le. 63)then
+              alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
+            else
+              n_core_orb_64 = n_core_orb
+              do ii=1,N_int
+                if(n_core_orb_64 .gt. 0)then
+                  alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1)
+                else
+                  alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2)
+                endif
+                n_core_orb_64 = ISHFT(n_core_orb_64,-6)
+              end do
+            endif
             NalphaIcfg_list(idxI) = NalphaIcfg
+            !print *,i,j,"2| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
           endif
         endif
       end do
@@ -261,14 +483,24 @@ use bitmasks
     Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
     Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
     kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    ndiffDOMO = 0
     do k = kstart, idxI-1
-      diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
-      ndiffSOMO = POPCNT(diffSOMO)
+      do ii=1,N_int
+        diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
+        ndiffSOMO += POPCNT(diffSOMO)
+      end do
+      ! ndiffSOMO cannot be 0 (I /= k)
+      ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
+      ! this Icfg could not have been generated before.
       if (ndiffSOMO /= 2) cycle
-      diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
-      xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-      ndiffDOMO = POPCNT(diffDOMO)
-      nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+      ndiffDOMO = 0
+      nxordiffSOMODOMO = 0
+      do ii=1,N_int
+        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
+        xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+        ndiffDOMO += POPCNT(diffDOMO)
+        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+      end do
       if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
         ppExistsQ = .TRUE.
         EXIT
@@ -279,8 +511,8 @@ use bitmasks
       ! SOMO
       if(POPCNT(Jsomo) .ge. NSOMOMin) then
         NalphaIcfg += 1
-        alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1)
-        alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2)
+        alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1)
+        alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2)
         NalphaIcfg_list(idxI) = NalphaIcfg
       endif
     endif
diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f
index aebf53d9..a84cb4ab 100644
--- a/src/csf/configurations.irp.f
+++ b/src/csf/configurations.irp.f
@@ -352,6 +352,11 @@ end
    psi_configuration(k,1,i) = ieor(psi_det(k,1,i),psi_det(k,2,i))
    psi_configuration(k,2,i) = iand(psi_det(k,1,i),psi_det(k,2,i))
   enddo
+ if(i.eq.1)then
+   print *,'Preparing PSI_CONFIGURATION i=',i
+            print *," Icfg somo=",psi_configuration(1,1,1), " ", psi_configuration(2,1,1)
+            print *," Icfg domo=",psi_configuration(1,2,1), " ", psi_configuration(2,2,1)
+ endif
  enddo
 
  ! Sort
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index 7d7ae09b..5fd630fc 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -38,6 +38,7 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI,
   integer :: holetype(mo_num)
   integer :: end_index
   integer :: Nsomo_I
+  integer :: listall(N_int*bit_kind_size), nelall
 
   ! 
   ! 2 2 1 1 0 0 : 1 1 0 0 0 0
@@ -65,9 +66,12 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI,
 
   ! Since CFGs are sorted wrt to seniority
   ! we don't have to search the full CFG list
-  Isomo = givenI(1,1)
-  Idomo = givenI(1,2)
-  Nsomo_I = POPCNT(Isomo)
+  Nsomo_I = 0
+  do i=1,N_int
+    Isomo = givenI(i,1)
+    Idomo = givenI(i,2)
+    Nsomo_I += POPCNT(Isomo)
+  end do
   end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1)
   if(end_index .LT. 0) end_index= N_configuration
   !end_index = N_configuration
@@ -83,17 +87,24 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI,
      !  idxs_connectedI(nconnectedI)=i
      !  cycle
      !endif
-     Isomo = givenI(1,1)
-     Idomo = givenI(1,2)
-     Jsomo = psi_configuration(1,1,i)
-     Jdomo = psi_configuration(1,2,i)
-     diffSOMO = IEOR(Isomo,Jsomo)
-     ndiffSOMO = POPCNT(diffSOMO)
-     diffDOMO = IEOR(Idomo,Jdomo)
-     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-     ndiffDOMO = POPCNT(diffDOMO)
-     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+
+     ndiffSOMO = 0
+     ndiffDOMO = 0
+     nxordiffSOMODOMO = 0
+     do ii=1,N_int
+       Isomo = givenI(ii,1)
+       Idomo = givenI(ii,2)
+       Jsomo = psi_configuration(ii,1,i)
+       Jdomo = psi_configuration(ii,2,i)
+       diffSOMO = IEOR(Isomo,Jsomo)
+       ndiffSOMO += POPCNT(diffSOMO)
+       diffDOMO = IEOR(Idomo,Jdomo)
+       xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+       ndiffDOMO += POPCNT(diffDOMO)
+       nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+       nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     end do
+
      if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
        !-------
        ! MONO |
@@ -144,25 +155,45 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI,
         ! find out all pq holes possible
         nholes = 0
         ! holes in SOMO
-        Isomo = psi_configuration(1,1,i)
-        Idomo = psi_configuration(1,2,i)
-        do iii = 1,n_act_orb
-          ii = list_act(iii)
-           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
-              nholes += 1
-              listholes(nholes) = ii
-              holetype(nholes) = 1
-           endif
+        !Isomo = psi_configuration(1,1,i)
+        !Idomo = psi_configuration(1,2,i)
+        !do iii = 1,n_act_orb
+        !  ii = list_act(iii)
+        !   if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+        !      nholes += 1
+        !      listholes(nholes) = ii
+        !      holetype(nholes) = 1
+        !   endif
+        !end do
+       
+        call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int)
+
+        do iii=1,nelall
+          nholes += 1
+          listholes(nholes) = listall(iii)
+          holetype(nholes) = 1
         end do
+
         ! holes in DOMO
-        do iii = 1,n_act_orb
-          ii = list_act(iii)
-           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
-              nholes += 1
-              listholes(nholes) = ii
-              holetype(nholes) = 2
-           endif
+        !do iii = 1,n_act_orb
+        !  ii = list_act(iii)
+        !   if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+        !      nholes += 1
+        !      listholes(nholes) = ii
+        !      holetype(nholes) = 2
+        !   endif
+        !end do
+        
+        call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int)
+
+        do iii=1,nelall
+          if(listall(iii) .gt. n_core_orb)then
+            nholes += 1
+            listholes(nholes) = listall(iii)
+            holetype(nholes) = 2
+          endif
         end do
+
         ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes)
      endif
   end do
@@ -199,6 +230,8 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
   integer*8                                :: Isomo
   integer*8                                :: Jdomo
   integer*8                                :: Jsomo
+  integer(bit_kind)                        :: Jcfg(N_int,2)
+  integer(bit_kind)                        :: Icfg(N_int,2)
   integer*8                                :: IJsomo
   integer*8                                :: diffSOMO
   integer*8                                :: diffDOMO
@@ -209,9 +242,12 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
   integer                        :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
   integer                        :: listholes(mo_num)
   integer                        :: holetype(mo_num)
-  integer                        :: end_index
-  integer                        :: Nsomo_alpha
+  integer                        :: end_index, ishift
+  integer                        :: Nsomo_alpha, pp,qq, nperm
   integer*8                      :: MS
+  integer                        :: exc(0:2,2,2), tz, m, n, high, low
+  integer                        :: listall(N_int*bit_kind_size), nelall
+  integer(bit_kind)              :: hole, particle, tmp
   MS = elec_alpha_num-elec_beta_num
 
   nconnectedI = 0
@@ -219,42 +255,66 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
 
   ! Since CFGs are sorted wrt to seniority
   ! we don't have to search the full CFG list
-  Isomo = Ialpha(1,1)
-  Idomo = Ialpha(1,2)
-  Nsomo_alpha = POPCNT(Isomo)
+  !Isomo = Ialpha(1,1)
+  !Idomo = Ialpha(1,2)
+  !Nsomo_alpha = POPCNT(Isomo)
+  Icfg = Ialpha
+  Nsomo_alpha = 0
+  do i=1,N_int
+    Isomo = Ialpha(i,1)
+    Idomo = Ialpha(i,2)
+    Nsomo_alpha += POPCNT(Isomo)
+  end do
   end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
   if(end_index .LT. 0) end_index= N_configuration
-  end_index = N_configuration
+  !end_index = N_configuration
 
 
   p = 0
   q = 0
-  if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
+  !if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
   do i=idxI,end_index
-     Isomo = Ialpha(1,1)
-     Idomo = Ialpha(1,2)
-     Jsomo = psi_configuration(1,1,i)
-     Jdomo = psi_configuration(1,2,i)
      ! Check for Minimal alpha electrons (MS)
-     if(POPCNT(Isomo).lt.MS)then
+     if(Nsomo_alpha .lt. MS)then
        cycle
      endif
-     diffSOMO = IEOR(Isomo,Jsomo)
-     ndiffSOMO = POPCNT(diffSOMO)
-     !if(idxI.eq.1)then
-     !  print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo)
-     !endif
-     diffDOMO = IEOR(Idomo,Jdomo)
-     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-     ndiffDOMO = POPCNT(diffDOMO)
-     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     !Isomo = Ialpha(1,1)
+     !Idomo = Ialpha(1,2)
+     !Jsomo = psi_configuration(1,1,i)
+     !Jdomo = psi_configuration(1,2,i)
+     !diffSOMO = IEOR(Isomo,Jsomo)
+     !ndiffSOMO = POPCNT(diffSOMO)
+     !diffDOMO = IEOR(Idomo,Jdomo)
+     !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     !ndiffDOMO = POPCNT(diffDOMO)
+     !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+
+     ndiffSOMO = 0
+     ndiffDOMO = 0
+     nxordiffSOMODOMO = 0
+     do ii=1,N_int
+       Isomo = Ialpha(ii,1)
+       Idomo = Ialpha(ii,2)
+       Jsomo = psi_configuration(ii,1,i)
+       Jdomo = psi_configuration(ii,2,i)
+       diffSOMO = IEOR(Isomo,Jsomo)
+       ndiffSOMO += POPCNT(diffSOMO)
+       diffDOMO = IEOR(Idomo,Jdomo)
+       xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+       ndiffDOMO += POPCNT(diffDOMO)
+       nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+       nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     end do
+     Jcfg = psi_configuration(:,:,i)
+
      if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
         select case(ndiffDOMO)
         case (0)
            ! SOMO -> VMO
            !print *,"obt SOMO -> VMO"
            extyp = 3
+           if(N_int .eq. 1) then
            IJsomo = IEOR(Isomo, Jsomo)
 !IRP_IF WITHOUT_TRAILZ
 !           p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1
@@ -267,6 +327,77 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
 !IRP_ELSE
            q = TRAILZ(IJsomo) + 1
 !IRP_ENDIF
+           !print *," p=",p," q=",q
+           !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int)
+         else
+  exc = 0
+  do ii = 1,2
+    ishift = 1-bit_kind_size
+    do l=1,N_int
+      ishift = ishift + bit_kind_size
+      if (Jcfg(l,ii) == Icfg(l,ii)) then
+        cycle
+      endif
+      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
+      particle = iand(tmp, Icfg(l,ii))
+      hole     = iand(tmp, Jcfg(l,ii))
+      if (particle /= 0_bit_kind) then
+        tz = trailz(particle)
+        exc(0,2,ii) = 1
+        exc(1,2,ii) = tz+ishift
+        !print *,"part ",tz+ishift, " ii=",ii, exc(1,2,2)
+      endif
+      if (hole /= 0_bit_kind) then
+        tz = trailz(hole)
+        exc(0,1,ii) = 1
+        exc(1,1,ii) = tz+ishift
+        !print *,"hole ",tz+ishift, " ii=",ii, exc(1,1,2)
+      endif
+
+      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
+        cycle
+      endif
+
+      high = max(exc(1,1,ii), exc(1,2,ii))-1
+      low  = min(exc(1,1,ii), exc(1,2,ii))
+
+      ASSERT (low >= 0)
+      ASSERT (high > 0)
+
+      k = shiftr(high,bit_kind_shift)+1
+      j = shiftr(low,bit_kind_shift)+1
+      m = iand(high,bit_kind_size-1)
+      n = iand(low,bit_kind_size-1)
+
+      if (j==k) then
+        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
+            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
+                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
+      else
+        nperm = nperm + popcnt(                                    &
+             iand(Jcfg(j,ii),                                   &
+                  iand(not(0_bit_kind),                            &
+                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
+             + popcnt(iand(Jcfg(k,ii),                          &
+                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
+
+        do iii=j+1,k-1
+          nperm = nperm + popcnt(Jcfg(iii,ii))
+        end do
+
+      endif
+
+      ! Set p and q
+      q = max(exc(1,1,1),exc(1,1,2))
+      p = max(exc(1,2,1),exc(1,2,2))
+      exit
+
+    enddo
+  enddo
+endif
+           !assert ( p == pp)
+           !assert ( q == qq)
+           !print *," --- p=",p," q=",q
         case (1)
            ! DOMO -> VMO
            ! or
@@ -277,6 +408,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
               ! DOMO -> VMO
               !print *,"obt DOMO -> VMO"
               extyp = 2
+              if(N_int.eq.1)then
 !IRP_IF WITHOUT_TRAILZ
 !              p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1
 !IRP_ELSE
@@ -289,10 +421,83 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
 !IRP_ELSE
               q = TRAILZ(Isomo) + 1
 !IRP_ENDIF
+  else
+    exc=0
+  exc(0,1,1) = 0
+  exc(0,2,1) = 0
+  exc(0,1,2) = 0
+  exc(0,2,2) = 0
+  do ii = 1,2
+    ishift = 1-bit_kind_size
+    do l=1,N_int
+      ishift = ishift + bit_kind_size
+      if (Jcfg(l,ii) == Icfg(l,ii)) then
+        cycle
+      endif
+      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
+      particle = iand(tmp, Icfg(l,ii))
+      hole     = iand(tmp, Jcfg(l,ii))
+      if (particle /= 0_bit_kind) then
+        tz = trailz(particle)
+        exc(0,2,ii) = 1
+        exc(1,2,ii) = tz+ishift
+        !print *,"part ",tz+ishift, " ii=",ii
+      endif
+      if (hole /= 0_bit_kind) then
+        tz = trailz(hole)
+        exc(0,1,ii) = 1
+        exc(1,1,ii) = tz+ishift
+        !print *,"hole ",tz+ishift, " ii=",ii
+      endif
+
+      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
+        cycle
+      endif
+
+      high = max(exc(1,1,ii), exc(1,2,ii))-1
+      low  = min(exc(1,1,ii), exc(1,2,ii))
+
+      ASSERT (low >= 0)
+      ASSERT (high > 0)
+
+      k = shiftr(high,bit_kind_shift)+1
+      j = shiftr(low,bit_kind_shift)+1
+      m = iand(high,bit_kind_size-1)
+      n = iand(low,bit_kind_size-1)
+
+      if (j==k) then
+        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
+            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
+                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
+      else
+        nperm = nperm + popcnt(                                    &
+             iand(Jcfg(j,ii),                                   &
+                  iand(not(0_bit_kind),                            &
+                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
+             + popcnt(iand(Jcfg(k,ii),                          &
+                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
+
+        do iii=j+1,k-1
+          nperm = nperm + popcnt(Jcfg(iii,ii))
+        end do
+
+      endif
+
+      ! Set p and q
+      q = max(exc(1,1,1),exc(1,1,2))
+      p = max(exc(1,2,1),exc(1,2,2))
+      exit
+
+    enddo
+  enddo
+endif
+           !assert ( p == pp)
+           !assert ( q == qq)
            else
               ! SOMO -> SOMO
               !print *,"obt SOMO -> SOMO"
               extyp = 1
+              if(N_int.eq.1)then
 !IRP_IF WITHOUT_TRAILZ
 !              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
 !IRP_ELSE
@@ -309,11 +514,84 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
               !if(POPCNT(Isomo).lt.MS)then
               !  cycle
               !endif
+  else
+  exc=0
+  exc(0,1,1) = 0
+  exc(0,2,1) = 0
+  exc(0,1,2) = 0
+  exc(0,2,2) = 0
+  do ii = 1,2
+    ishift = 1-bit_kind_size
+    do l=1,N_int
+      ishift = ishift + bit_kind_size
+      if (Jcfg(l,ii) == Icfg(l,ii)) then
+        cycle
+      endif
+      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
+      particle = iand(tmp, Icfg(l,ii))
+      hole     = iand(tmp, Jcfg(l,ii))
+      if (particle /= 0_bit_kind) then
+        tz = trailz(particle)
+        exc(0,2,ii) = 1
+        exc(1,2,ii) = tz+ishift
+        !print *,"part ",tz+ishift, " ii=",ii
+      endif
+      if (hole /= 0_bit_kind) then
+        tz = trailz(hole)
+        exc(0,1,ii) = 1
+        exc(1,1,ii) = tz+ishift
+        !print *,"hole ",tz+ishift, " ii=",ii
+      endif
+
+      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
+        cycle
+      endif
+
+      high = max(exc(1,1,ii), exc(1,2,ii))-1
+      low  = min(exc(1,1,ii), exc(1,2,ii))
+
+      ASSERT (low >= 0)
+      ASSERT (high > 0)
+
+      k = shiftr(high,bit_kind_shift)+1
+      j = shiftr(low,bit_kind_shift)+1
+      m = iand(high,bit_kind_size-1)
+      n = iand(low,bit_kind_size-1)
+
+      if (j==k) then
+        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
+            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
+                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
+      else
+        nperm = nperm + popcnt(                                    &
+             iand(Jcfg(j,ii),                                   &
+                  iand(not(0_bit_kind),                            &
+                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
+             + popcnt(iand(Jcfg(k,ii),                          &
+                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
+
+        do iii=j+1,k-1
+          nperm = nperm + popcnt(Jcfg(iii,ii))
+        end do
+
+      endif
+
+      ! Set p and q
+      q = max(exc(1,1,1),exc(1,1,2))
+      p = max(exc(1,2,1),exc(1,2,2))
+      exit
+
+    enddo
+  enddo
+endif
+           !assert ( p == pp)
+           !assert ( q == qq)
            end if
         case (2)
            ! DOMO -> SOMO
            !print *,"obt DOMO -> SOMO"
            extyp = 4
+           if(N_int.eq.1)then
            IJsomo = IEOR(Isomo, Jsomo)
 !IRP_IF WITHOUT_TRAILZ
 !           p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1
@@ -326,6 +604,79 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
 !IRP_ELSE
            q = TRAILZ(IJsomo) + 1
 !IRP_ENDIF
+
+  else
+  exc=0
+  exc(0,1,1) = 0
+  exc(0,2,1) = 0
+  exc(0,1,2) = 0
+  exc(0,2,2) = 0
+  do ii = 1,2
+    ishift = 1-bit_kind_size
+    do l=1,N_int
+      ishift = ishift + bit_kind_size
+      if (Jcfg(l,ii) == Icfg(l,ii)) then
+        cycle
+      endif
+      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
+      particle = iand(tmp, Icfg(l,ii))
+      hole     = iand(tmp, Jcfg(l,ii))
+      if (particle /= 0_bit_kind) then
+        tz = trailz(particle)
+        exc(0,2,ii) = 1
+        exc(1,2,ii) = tz+ishift
+        !print *,"part ",tz+ishift, " ii=",ii
+      endif
+      if (hole /= 0_bit_kind) then
+        tz = trailz(hole)
+        exc(0,1,ii) = 1
+        exc(1,1,ii) = tz+ishift
+        !print *,"hole ",tz+ishift, " ii=",ii
+      endif
+
+      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
+        cycle
+      endif
+
+      high = max(exc(1,1,ii), exc(1,2,ii))-1
+      low  = min(exc(1,1,ii), exc(1,2,ii))
+
+      ASSERT (low >= 0)
+      ASSERT (high > 0)
+
+      k = shiftr(high,bit_kind_shift)+1
+      j = shiftr(low,bit_kind_shift)+1
+      m = iand(high,bit_kind_size-1)
+      n = iand(low,bit_kind_size-1)
+
+      if (j==k) then
+        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
+            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
+                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
+      else
+        nperm = nperm + popcnt(                                    &
+             iand(Jcfg(j,ii),                                   &
+                  iand(not(0_bit_kind),                            &
+                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
+             + popcnt(iand(Jcfg(k,ii),                          &
+                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
+
+        do iii=j+1,k-1
+          nperm = nperm + popcnt(Jcfg(iii,ii))
+        end do
+
+      endif
+
+      ! Set p and q
+      q = max(exc(1,1,1),exc(1,1,2))
+      p = max(exc(1,2,1),exc(1,2,2))
+      exit
+
+    enddo
+  enddo
+endif
+           !assert ( p == pp)
+           !assert ( q == qq)
         case default
            print *,"something went wront in get connectedI"
         end select
@@ -345,26 +696,46 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
         ! find out all pq holes possible
         nholes = 0
         ! holes in SOMO
-        Isomo = psi_configuration(1,1,i)
-        Idomo = psi_configuration(1,2,i)
-        do iii = 1,n_act_orb
-          ii = list_act(iii)
-           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
-              nholes += 1
-              listholes(nholes) = ii
-              holetype(nholes) = 1
-           endif
+        !Isomo = psi_configuration(1,1,i)
+        !Idomo = psi_configuration(1,2,i)
+        !do iii = 1,n_act_orb
+        !  ii = list_act(iii)
+        !   if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+        !      nholes += 1
+        !      listholes(nholes) = ii
+        !      holetype(nholes) = 1
+        !   endif
+        !end do
+        call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int)
+
+        do iii=1,nelall
+          nholes += 1
+          listholes(nholes) = listall(iii)
+          holetype(nholes) = 1
         end do
+
         ! holes in DOMO
-        do iii = 1,n_act_orb
-          ii = list_act(iii)
-           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
-              nholes += 1
-              listholes(nholes) = ii
-              holetype(nholes) = 2
-           endif
+        !do iii = 1,n_act_orb
+        !  ii = list_act(iii)
+        !   if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+        !      nholes += 1
+        !      listholes(nholes) = ii
+        !      holetype(nholes) = 2
+        !   endif
+        !end do
+        nelall=0
+        listall=0
+        call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int)
+
+        do iii=1,nelall
+          if(listall(iii) .gt. n_core_orb)then
+            nholes += 1
+            listholes(nholes) = listall(iii)
+            holetype(nholes) = 2
+          endif
         end do
 
+
         do k=1,nholes
            p = listholes(k)
            q = p
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 21c19aaa..99def47e 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -835,7 +835,7 @@ subroutine calculate_preconditioner_cfg(diag_energies)
   ! the configurations in psi_configuration
   ! returns : diag_energies :
   END_DOC
-  integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj
+  integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj, iii
   real*8,intent(out) :: diag_energies(n_CSF)
   integer                            :: nholes
   integer                            :: nvmos
@@ -863,6 +863,7 @@ subroutine calculate_preconditioner_cfg(diag_energies)
   real*8            :: meCC
   real*8            :: ecore
   real*8            :: core_act_contrib
+  integer                        :: listall(N_int*bit_kind_size), nelall
 
   !PROVIDE h_core_ri
   PROVIDE core_fock_operator
@@ -894,47 +895,61 @@ subroutine calculate_preconditioner_cfg(diag_energies)
      ! find out all pq holes possible
      nholes = 0
      ! holes in SOMO
-     !do k = 1,mo_num
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-        if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
-           nholes += 1
-           listholes(nholes) = k
-           holetype(nholes) = 1
-        endif
-     enddo
-     ! holes in DOMO
-     !do k = n_core_orb+1,n_core_orb + n_act_orb
-     !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb
-     !do k = 1,mo_num
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-        if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
-           nholes += 1
-           listholes(nholes) = k
-           holetype(nholes) = 2
-        endif
-     enddo
+     !do kk = 1,n_act_orb
+     !  k = list_act(kk)
+     !   if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+     !      nholes += 1
+     !      listholes(nholes) = k
+     !      holetype(nholes) = 1
+     !   endif
+     !enddo
+     call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int)
 
-     ! find vmos
-     listvmos = -1
-     vmotype = -1
-     nvmos = 0
-     !do k = n_core_orb+1,n_core_orb + n_act_orb
-     !do k = 1,mo_num
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-        !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
-        if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
-           nvmos += 1
-           listvmos(nvmos) = k
-           vmotype(nvmos) = 0
-        else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
-           nvmos += 1
-           listvmos(nvmos) = k
-           vmotype(nvmos) = 1
-        end if
-     enddo
+     do iii=1,nelall
+       nholes += 1
+       listholes(nholes) = listall(iii)
+       holetype(nholes) = 1
+     end do
+
+     ! holes in DOMO
+     !do kk = 1,n_act_orb
+     !  k = list_act(kk)
+     !   if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+     !      nholes += 1
+     !      listholes(nholes) = k
+     !      holetype(nholes) = 2
+     !   endif
+     !enddo
+     call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int)
+
+     do iii=1,nelall
+       if(listall(iii) .gt. n_core_orb)then
+         nholes += 1
+         listholes(nholes) = listall(iii)
+         holetype(nholes) = 2
+       endif
+     end do
+
+
+     !!! find vmos
+     !!listvmos = -1
+     !!vmotype = -1
+     !!nvmos = 0
+     !!!do k = n_core_orb+1,n_core_orb + n_act_orb
+     !!!do k = 1,mo_num
+     !!do kk = 1,n_act_orb
+     !!  k = list_act(kk)
+     !!   !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+     !!   if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+     !!      nvmos += 1
+     !!      listvmos(nvmos) = k
+     !!      vmotype(nvmos) = 0
+     !!   else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+     !!      nvmos += 1
+     !!      listvmos(nvmos) = k
+     !!      vmotype(nvmos) = 1
+     !!   end if
+     !!enddo
      !print *,"I=",i
      !call debug_spindet(psi_configuration(1,1,i),N_int)
      !call debug_spindet(psi_configuration(1,2,i),N_int)
@@ -1413,8 +1428,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   !nconnectedtotalmax = 1000
   !nconnectedmaxJ = 1000
   maxnalphas = elec_num*mo_num
-  Icfg(1,1) = psi_configuration(1,1,1)
-  Icfg(1,2) = psi_configuration(1,2,1)
+  Icfg(:,1) = psi_configuration(:,1,1)
+  Icfg(:,2) = psi_configuration(:,2,1)
   allocate(listconnectedJ(N_INT,2,max(sze,10000)))
   allocate(idslistconnectedJ(max(sze,10000)))
   call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax)
@@ -1632,9 +1647,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
 
      Nalphas_Icfg = NalphaIcfg_list(i)
      alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
-     if(Nalphas_Icfg .GT. maxnalphas) then
-       print *,"Nalpha > maxnalpha"
-     endif
+     !if(Nalphas_Icfg .GT. maxnalphas) then
+     !  print *,"Nalpha > maxnalpha"
+     !endif
 
      call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
 
@@ -1650,15 +1665,15 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
         call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, &
                                          nconnectedI, excitationIds, excitationTypes, diagfactors)
 
+        !if(i .EQ. 1) then
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k))
+        !endif
+
         
         if(nconnectedI .EQ. 0) then
            cycle
         endif
 
-        !if(i .EQ. 1) then
-        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k))
-        !endif
-
         ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
         totcolsTKI = 0
         rowsTKI = -1
diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index 897607a9..08718023 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -83,7 +83,7 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint)
   !               exc(1,1,1) = q
   !               exc(1,2,1) = p
 
-  ! T^alpha_pq  : exc(0,1,2) = 1
+  ! T^beta_pq   : exc(0,1,2) = 1
   !               exc(0,2,2) = 1
   !               exc(1,1,2) = q
   !               exc(1,2,2) = p
@@ -434,6 +434,98 @@ subroutine get_single_excitation(det1,det2,exc,phase,Nint)
 
 end
 
+subroutine get_single_excitation_cfg(cfg1,cfg2,p,q,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns the excitation operator between two singly excited configurations.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: cfg1(Nint,2)
+  integer(bit_kind), intent(in)  :: cfg2(Nint,2)
+  integer, intent(out)           :: p, q
+  integer                        :: tz
+  integer                        :: l, ispin, idx_hole, idx_particle, ishift
+  integer                        :: nperm
+  integer                        :: i,j,k,m,n
+  integer                        :: high, low
+  integer                        :: a,b,c,d
+  integer(bit_kind)              :: hole, particle, tmp
+  integer                        :: exc(0:2,2,2)
+
+  ASSERT (Nint > 0)
+  nperm = 0
+  p = 0
+  q = 0
+  exc(0,1,1) = 0
+  exc(0,2,1) = 0
+  exc(0,1,2) = 0
+  exc(0,2,2) = 0
+  do ispin = 1,2
+    ishift = 1-bit_kind_size
+    do l=1,Nint
+      ishift = ishift + bit_kind_size
+      if (cfg1(l,ispin) == cfg2(l,ispin)) then
+        cycle
+      endif
+      tmp = xor( cfg1(l,ispin), cfg2(l,ispin) )
+      particle = iand(tmp, cfg2(l,ispin))
+      hole     = iand(tmp, cfg1(l,ispin))
+      if (particle /= 0_bit_kind) then
+        tz = trailz(particle)
+        exc(0,2,ispin) = 1
+        exc(1,2,ispin) = tz+ishift
+        !print *,"part ",tz+ishift, " ispin=",ispin
+      endif
+      if (hole /= 0_bit_kind) then
+        tz = trailz(hole)
+        exc(0,1,ispin) = 1
+        exc(1,1,ispin) = tz+ishift
+        !print *,"hole ",tz+ishift, " ispin=",ispin
+      endif
+
+      if ( iand(exc(0,1,ispin),exc(0,2,ispin)) /= 1) then  ! exc(0,1,ispin)/=1 and exc(0,2,ispin) /= 1
+        cycle
+      endif
+
+      high = max(exc(1,1,ispin), exc(1,2,ispin))-1
+      low  = min(exc(1,1,ispin), exc(1,2,ispin))
+
+      ASSERT (low >= 0)
+      ASSERT (high > 0)
+
+      k = shiftr(high,bit_kind_shift)+1
+      j = shiftr(low,bit_kind_shift)+1
+      m = iand(high,bit_kind_size-1)
+      n = iand(low,bit_kind_size-1)
+
+      if (j==k) then
+        nperm = nperm + popcnt(iand(cfg1(j,ispin),           &
+            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
+                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
+      else
+        nperm = nperm + popcnt(                                    &
+             iand(cfg1(j,ispin),                                   &
+                  iand(not(0_bit_kind),                            &
+                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
+             + popcnt(iand(cfg1(k,ispin),                          &
+                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
+
+        do i=j+1,k-1
+          nperm = nperm + popcnt(cfg1(i,ispin))
+        end do
+
+      endif
+
+      ! Set p and q
+      q = max(exc(1,1,1),exc(1,1,2))
+      p = max(exc(1,2,1),exc(1,2,2))
+      return
+
+    enddo
+  enddo
+end
+
 subroutine bitstring_to_list_ab( string, list, n_elements, Nint)
   use bitmasks
   implicit none

From 1fbbaf58a54e5486e0a7019cd6bb424a189efa6e Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 21 Nov 2022 17:04:03 +0100
Subject: [PATCH 02/35] Fix iand for csf.

---
 src/csf/sigma_vector.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 99def47e..cfe6279f 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -240,7 +240,7 @@ end subroutine get_phase_qp_to_cfg
   ! initialization
   psi_coef_config = 0.d0
   DetToCSFTransformationMatrix(0,:,:) = 1.d0
-  do i = 2-iand(MS,1), NSOMOMax,2
+  do i = 2-iand(MS,1_8), NSOMOMax,2
     Isomo = IBSET(0_8, i) - 1_8
     ! rows = Ncsfs
     ! cols = Ndets

From f2f9b9ffd0246f46ba3e8181b4272b686562d971 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Tue, 22 Nov 2022 17:56:29 +0100
Subject: [PATCH 03/35] Working on csf nint.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 908 +++++++++++++------
 1 file changed, 618 insertions(+), 290 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 167b41fc..581498c5 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -1,5 +1,529 @@
 use bitmasks
 
+!!! BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
+!!!&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
+!!!  implicit none
+!!!  !use bitmasks
+!!!  BEGIN_DOC
+!!!  ! Documentation for alphasI
+!!!  ! Returns the associated alpha's for
+!!!  ! the input configuration Icfg.
+!!!  END_DOC
+!!!
+!!!  integer                        :: idxI ! The id of the Ith CFG
+!!!  integer(bit_kind)              :: Icfg(N_int,2)
+!!!  integer(bit_kind)              :: Jcfg(N_int,2)
+!!!  integer                        :: NalphaIcfg
+!!!  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+!!!  integer                        :: listholes(mo_num)
+!!!  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+!!!  integer                        :: nholes
+!!!  integer                        :: nvmos
+!!!  integer                        :: listvmos(mo_num)
+!!!  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+!!!  integer*8                      :: Idomo, Idomop, Idomoq 
+!!!  integer*8                      :: Isomo, Isomop, Isomoq
+!!!  integer*8                      :: Jdomo, Jdomop, Jdomoq
+!!!  integer*8                      :: Jsomo, Jsomop, Jsomoq
+!!!  integer*8                      :: diffSOMO
+!!!  integer*8                      :: diffDOMO
+!!!  integer*8                      :: xordiffSOMODOMO
+!!!  integer                        :: ndiffSOMO
+!!!  integer                        :: ndiffDOMO
+!!!  integer                        :: nxordiffSOMODOMO
+!!!  integer                        :: ndiffAll
+!!!  integer                        :: i,ii,iii
+!!!  integer                        :: j,jj, i_s, i_d
+!!!  integer                        :: k,kk
+!!!  integer                        :: kstart
+!!!  integer                        :: kend
+!!!  integer                        :: Nsomo_I, Nsomo_J
+!!!  integer                        :: hole, n_core_orb_64
+!!!  integer                        :: p, pp, p_s
+!!!  integer                        :: q, qq, q_s
+!!!  integer                        :: countalphas
+!!!  logical                        :: pqAlreadyGenQ
+!!!  logical                        :: pqExistsQ
+!!!  logical                        :: ppExistsQ
+!!!  integer*8                      :: MS
+!!!  integer :: listall(N_int*bit_kind_size), nelall
+!!!
+!!!  double precision               :: t0, t1
+!!!  call wall_time(t0)
+!!!
+!!!  MS = elec_alpha_num-elec_beta_num
+!!!
+!!!  allocate(tableUniqueAlphas(mo_num,mo_num))
+!!!  NalphaIcfg_list = 0
+!!!
+!!!  do idxI = 1, N_configuration
+!!!
+!!!    Icfg  = psi_configuration(:,:,idxI)
+!!!    Jcfg  = psi_configuration(:,:,idxI)
+!!!    !print *," Jcfg somo=",Jcfg(1,1), " ", Jcfg(2,1)
+!!!    !print *," Jcfg domo=",Jcfg(1,2), " ", Jcfg(2,2)
+!!!
+!!!    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+!!!    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+!!!
+!!!    ! find out all pq holes possible
+!!!    nholes = 0
+!!!    ! holes in SOMO
+!!!    !do ii = 1,n_act_orb
+!!!    !  i = list_act(ii)
+!!!    !  if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+!!!    !    nholes += 1
+!!!    !    listholes(nholes) = i
+!!!    !    holetype(nholes) = 1
+!!!    !  endif
+!!!    !end do
+!!!        call bitstring_to_list(psi_configuration(1,1,idxI),listall,nelall,N_int)
+!!!
+!!!        !print *,'list somo'
+!!!        do iii=1,nelall
+!!!          nholes += 1
+!!!          listholes(nholes) = listall(iii)
+!!!            !print *,listall(iii)
+!!!          holetype(nholes) = 1
+!!!        end do
+!!!
+!!!        Nsomo_I = nelall
+!!!
+!!!    ! holes in DOMO
+!!!    !do ii = 1,n_act_orb
+!!!    !  i = list_act(ii)
+!!!    !  if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+!!!    !    nholes += 1
+!!!    !    listholes(nholes) = i
+!!!    !    holetype(nholes) = 2
+!!!    !  endif
+!!!    !end do
+!!!
+!!!    !do iii=1,N_int
+!!!    !  print *,' iii=',iii, psi_configuration(iii,2,idxI), ' idxI=',idxI
+!!!    !end do
+!!!        call bitstring_to_list(psi_configuration(1,2,idxI),listall,nelall,N_int)
+!!!
+!!!        !print *,'list domo ncore=',n_core_orb, ' nelall=',nelall
+!!!        do iii=1,nelall
+!!!          if(listall(iii) .gt. n_core_orb)then
+!!!            nholes += 1
+!!!            listholes(nholes) = listall(iii)
+!!!            !print *,listall(iii)
+!!!            holetype(nholes) = 2
+!!!          endif
+!!!        end do
+!!!
+!!!    ! find vmos
+!!!    listvmos = -1
+!!!    vmotype = -1
+!!!    nvmos = 0
+!!!    !do ii = 1,n_act_orb
+!!!    !  i = list_act(ii)
+!!!    !  if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+!!!    !    if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+!!!    !      nvmos += 1
+!!!    !      listvmos(nvmos) = i
+!!!    !      print *,'1 i=',i
+!!!    !      vmotype(nvmos) = 1
+!!!    !    else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+!!!    !      nvmos += 1
+!!!    !      listvmos(nvmos) = i
+!!!    !      print *,'2 i=',i
+!!!    !      vmotype(nvmos) = 2
+!!!    !    end if
+!!!    !  end if
+!!!    !end do
+!!!    !print *,'-----------'
+!!!
+!!!    ! Take into account N_int
+!!!    do ii = 1, n_act_orb
+!!!      iii = list_act(ii)
+!!!      i_s = (1+((iii-1)/63))
+!!!      i = iii - ( i_s -1 )*63 
+!!!      Isomo = iand(act_bitmask(i_s,1),Icfg(i_s,1))
+!!!      Idomo = iand(act_bitmask(i_s,1),Icfg(i_s,2))
+!!!
+!!!      if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+!!!        if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+!!!          nvmos += 1
+!!!          listvmos(nvmos) = iii
+!!!          vmotype(nvmos) = 1
+!!!        else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+!!!          nvmos += 1
+!!!          listvmos(nvmos) = iii
+!!!          vmotype(nvmos) = 2
+!!!        end if
+!!!      end if
+!!!    end do
+!!!
+!!!    tableUniqueAlphas = .FALSE.
+!!!
+!!!    ! Now find the allowed (p,q) excitations
+!!!    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+!!!    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+!!!    !Nsomo_I = POPCNT(Isomo)
+!!!    if(Nsomo_I .EQ. 0) then
+!!!      kstart = 1
+!!!    else
+!!!      kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+!!!    endif
+!!!    kend = idxI-1
+!!!
+!!!    do i = 1,nholes
+!!!      pp  = listholes(i)
+!!!      p_s = (1+((pp-1)/63))
+!!!      p   = pp - (p_s - 1)*63
+!!!      !print *,' pp=',pp, ' p_s=',p_s, ' p=',p
+!!!      do j = 1,nvmos
+!!!        qq  = listvmos(j)
+!!!        q_s = (1+((qq-1)/63))
+!!!        q   = qq - (q_s - 1)*63
+!!!        !print *,' qq=',qq, ' q_s=',q_s, ' q=',q
+!!!        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
+!!!        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
+!!!        Isomop = iand(act_bitmask(i_s,1),Icfg(q_s,1))
+!!!        Idomop = iand(act_bitmask(i_s,1),Icfg(q_s,2))
+!!!        if(p .EQ. q) cycle
+!!!        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+!!!          ! SOMO -> VMO
+!!!          !print *,'SOMO -> VMO'
+!!!          if (p_s .eq. q_s) then
+!!!            Jsomop = IBCLR(Isomop,p-1)
+!!!            Jsomop = IBSET(Jsomop,q-1)
+!!!            Jsomoq = Jsomop
+!!!          else
+!!!            Jsomop = IBCLR(Isomop,p-1)
+!!!            Jsomoq = IBSET(Isomoq,q-1)
+!!!          endif
+!!!
+!!!          ! Domo remains the same
+!!!          Jdomop = Idomop
+!!!          Jdomoq = Idomoq
+!!!
+!!!          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+!!!          kend = idxI-1
+!!!        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+!!!          ! SOMO -> SOMO
+!!!          !print *,'SOMO -> SOMO'
+!!!          if(p_s .eq. q_s) then
+!!!            Jsomop = IBCLR(Isomop,p-1)
+!!!            Jsomop = IBCLR(Jsomop,q-1)
+!!!            Jsomoq = Jsomop
+!!!          else
+!!!            Jsomop = IBCLR(Isomop,p-1)
+!!!            Jsomoq = IBCLR(Isomoq,q-1)
+!!!          endif
+!!!
+!!!          Jdomoq = IBSET(Idomoq,q-1)
+!!!
+!!!          ! Check for Minimal alpha electrons (MS)
+!!!          if(POPCNT(Jsomoq).ge.MS)then
+!!!            kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+!!!            kend = idxI-1
+!!!          else
+!!!            cycle
+!!!          endif
+!!!        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+!!!          ! DOMO -> VMO
+!!!          !print *,'DOMO -> VMO', Isomop, p, q, Jsomop
+!!!          if(p_s .eq. q_s) then
+!!!            Jsomop = IBSET(Isomop,p-1)
+!!!            Jsomop = IBSET(Jsomop,q-1)
+!!!            Jsomoq = Jsomop
+!!!          else
+!!!            Jsomop = IBSET(Isomop,p-1)
+!!!            Jsomoq = IBSET(Jsomoq,q-1)
+!!!          endif
+!!!          !print *, 'Jsomop=', Jsomop
+!!!
+!!!          Jdomop = IBCLR(Idomop,p-1)
+!!!
+!!!          kstart = cfg_seniority_index(Nsomo_I)
+!!!          kend = idxI-1
+!!!        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+!!!          ! DOMO -> SOMO
+!!!          !print *,'DOMO -> SOMO'
+!!!          if(p_s .eq. q_s) then
+!!!            Jsomop = IBSET(Isomop,p-1)
+!!!            Jsomop = IBCLR(Jsomop,q-1)
+!!!            Jsomoq = Jsomop
+!!!
+!!!            Jdomop = IBCLR(Idomop,p-1)
+!!!            Jdomop = IBSET(Jdomop,q-1)
+!!!            Jdomoq = Jdomop
+!!!          else
+!!!            Jsomop = IBSET(Isomop,p-1)
+!!!            Jsomoq = IBCLR(Jsomoq,q-1)
+!!!
+!!!            Jdomop = IBCLR(Idomop,p-1)
+!!!            Jdomoq = IBSET(Jdomoq,q-1)
+!!!          endif
+!!!
+!!!          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+!!!          kend = idxI-1
+!!!        else
+!!!          print*,"Something went wrong in obtain_associated_alphaI"
+!!!        endif
+!!!
+!!!        ! Save it to Jcfg
+!!!        !print *,i,j,"0| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
+!!!        Jcfg(p_s,1) = Jsomop
+!!!        Jcfg(q_s,1) = Jsomoq
+!!!        Jcfg(p_s,2) = Jdomop
+!!!        Jcfg(q_s,2) = Jdomoq
+!!!        !print *,'p_s=',p_s,' q_s=', q_s
+!!!        !print *,'Jsomop=',Jsomop, ' Jsomoq=', Jsomoq, ' Jdomop=', Jdomop, ' Jdomoq=', Jdomo
+!!!        !print *,i,j,"1| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
+!!!        call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int)
+!!!        Nsomo_J = nelall
+!!!
+!!!        ! Check for Minimal alpha electrons (MS)
+!!!        if(Nsomo_J.lt.MS)then
+!!!          cycle
+!!!        endif
+!!!
+!!!        ! Again, we don't have to search from 1
+!!!        ! we just use seniority to find the
+!!!        ! first index with NSOMO - 2 to NSOMO + 2
+!!!        ! this is what is done in kstart, kend
+!!!
+!!!        pqAlreadyGenQ = .FALSE.
+!!!        ! First check if it can be generated before
+!!!        do k = kstart, kend
+!!!          !diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
+!!!          !ndiffSOMO = POPCNT(diffSOMO)
+!!!          !if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+!!!          !diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
+!!!          !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+!!!          !ndiffDOMO = POPCNT(diffDOMO)
+!!!          !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+!!!          !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+!!!
+!!!          ndiffSOMO = 0
+!!!          ndiffDOMO = 0
+!!!          nxordiffSOMODOMO = 0
+!!!          do ii = 1, N_int
+!!!            Jsomo = Jcfg(ii,1)
+!!!            Jdomo = Jcfg(ii,2)
+!!!            diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+!!!            ndiffSOMO += POPCNT(diffSOMO)
+!!!            diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+!!!            xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+!!!            ndiffDOMO += POPCNT(diffDOMO)
+!!!            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+!!!            nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+!!!          end do
+!!!
+!!!          if((ndiffSOMO .ne. 0) .and. (ndiffSOMO .ne. 2)) cycle
+!!!
+!!!          if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+!!!            pqAlreadyGenQ = .TRUE.
+!!!            ppExistsQ = .TRUE.
+!!!            EXIT
+!!!          endif
+!!!          if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+!!!            pqAlreadyGenQ = .TRUE.
+!!!            EXIT
+!!!          endif
+!!!        end do
+!!!
+!!!        if(pqAlreadyGenQ) cycle
+!!!
+!!!        pqExistsQ = .FALSE.
+!!!
+!!!        if(.NOT. pqExistsQ) then
+!!!          tableUniqueAlphas(p,q) = .TRUE.
+!!!        endif
+!!!      end do
+!!!    end do
+!!!
+!!!    !print *,tableUniqueAlphas(:,:)
+!!!
+!!!    ! prune list of alphas
+!!!    Isomo = Icfg(1,1)
+!!!    Idomo = Icfg(1,2)
+!!!    Jsomo = Icfg(1,1)
+!!!    Jdomo = Icfg(1,2)
+!!!    NalphaIcfg = 0
+!!!    do i = 1, nholes
+!!!      !p = listholes(i)
+!!!      pp  = listholes(i)
+!!!      p_s = (1+((pp-1)/63))
+!!!      p   = pp - (p_s - 1)*63
+!!!      do j = 1, nvmos
+!!!        !q = listvmos(j)
+!!!        qq  = listvmos(j)
+!!!        q_s = (1+((qq-1)/63))
+!!!        q   = qq - (q_s - 1)*63
+!!!        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
+!!!        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
+!!!        Isomoq = iand(act_bitmask(i_s,1),Icfg(q_s,1))
+!!!        Idomoq = iand(act_bitmask(i_s,1),Icfg(q_s,2))
+!!!        if(p .EQ. q) cycle
+!!!        if(tableUniqueAlphas(p,q)) then
+!!!          if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+!!!            ! SOMO -> VMO
+!!!            !Jsomo = IBCLR(Isomo,p-1)
+!!!            !Jsomo = IBSET(Jsomo,q-1)
+!!!            !Jdomo = Idomo
+!!!            if (p_s .eq. q_s) then
+!!!              Jsomop = IBCLR(Isomop,p-1)
+!!!              Jsomop = IBSET(Jsomop,q-1)
+!!!              Jsomoq = Jsomop
+!!!            else
+!!!              Jsomop = IBCLR(Isomop,p-1)
+!!!              Jsomoq = IBSET(Isomoq,q-1)
+!!!            endif
+!!!
+!!!            ! Domo remains the same
+!!!            Jdomop = Idomop
+!!!            Jdomoq = Idomoq
+!!!
+!!!          else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+!!!            ! SOMO -> SOMO
+!!!            !Jsomo = IBCLR(Isomo,p-1)
+!!!            !Jsomo = IBCLR(Jsomo,q-1)
+!!!            !Jdomo = IBSET(Idomo,q-1)
+!!!
+!!!            if(p_s .eq. q_s) then
+!!!              Jsomop = IBCLR(Isomop,p-1)
+!!!              Jsomop = IBCLR(Jsomop,q-1)
+!!!              Jsomoq = Jsomop
+!!!            else
+!!!              Jsomop = IBCLR(Isomop,p-1)
+!!!              Jsomoq = IBCLR(Isomoq,q-1)
+!!!            endif
+!!!
+!!!            Jdomoq = IBSET(Idomoq,q-1)
+!!!
+!!!            if(POPCNT(Jsomoq).ge.MS)then
+!!!              kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+!!!              kend = idxI-1
+!!!            else
+!!!              cycle
+!!!            endif
+!!!          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+!!!            ! DOMO -> VMO
+!!!            !Jsomo = IBSET(Isomo,p-1)
+!!!            !Jsomo = IBSET(Jsomo,q-1)
+!!!            !Jdomo = IBCLR(Idomo,p-1)
+!!!
+!!!            if(p_s .eq. q_s) then
+!!!              Jsomop = IBSET(Isomop,p-1)
+!!!              Jsomop = IBSET(Jsomop,q-1)
+!!!              Jsomoq = Jsomop
+!!!            else
+!!!              Jsomop = IBSET(Isomop,p-1)
+!!!              Jsomoq = IBSET(Jsomoq,q-1)
+!!!            endif
+!!!
+!!!            Jdomop = IBCLR(Idomop,p-1)
+!!!
+!!!          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+!!!            ! DOMO -> SOMO
+!!!            !Jsomo = IBSET(Isomo,p-1)
+!!!            !Jsomo = IBCLR(Jsomo,q-1)
+!!!            !Jdomo = IBCLR(Idomo,p-1)
+!!!            !Jdomo = IBSET(Jdomo,q-1)
+!!!            if(p_s .eq. q_s) then
+!!!              Jsomop = IBSET(Isomop,p-1)
+!!!              Jsomop = IBCLR(Jsomop,q-1)
+!!!              Jsomoq = Jsomop
+!!!
+!!!              Jdomop = IBCLR(Idomop,p-1)
+!!!              Jdomop = IBSET(Jdomop,q-1)
+!!!              Jdomoq = Jdomop
+!!!            else
+!!!              Jsomop = IBSET(Isomop,p-1)
+!!!              Jsomoq = IBCLR(Jsomoq,q-1)
+!!!
+!!!              Jdomop = IBCLR(Idomop,p-1)
+!!!              Jdomoq = IBSET(Jdomoq,q-1)
+!!!            endif
+!!!
+!!!          else
+!!!            print*,"Something went wrong in obtain_associated_alphaI"
+!!!          endif
+!!!
+!!!          ! Save it to Jcfg
+!!!          Jcfg(p_s,1) = Jsomop
+!!!          Jcfg(q_s,1) = Jsomoq
+!!!          Jcfg(p_s,2) = Jdomop
+!!!          Jcfg(q_s,2) = Jdomoq
+!!!
+!!!          ! SOMO
+!!!          !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+!!!          if(POPCNT(Jsomo) .ge. NSOMOMin) then
+!!!            NalphaIcfg += 1
+!!!            alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
+!!!            !alphasIcfg_list(:,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+!!!            if(n_core_orb .le. 63)then
+!!!              alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
+!!!            else
+!!!              n_core_orb_64 = n_core_orb
+!!!              do ii=1,N_int
+!!!                if(n_core_orb_64 .gt. 0)then
+!!!                  alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1)
+!!!                else
+!!!                  alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2)
+!!!                endif
+!!!                n_core_orb_64 = ISHFT(n_core_orb_64,-6)
+!!!              end do
+!!!            endif
+!!!            NalphaIcfg_list(idxI) = NalphaIcfg
+!!!            !print *,i,j,"2| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
+!!!          endif
+!!!        endif
+!!!      end do
+!!!    end do
+!!!
+!!!    ! Check if this Icfg has been previously generated as a mono
+!!!    ppExistsQ = .False.
+!!!    Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
+!!!    Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
+!!!    kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+!!!    ndiffDOMO = 0
+!!!    do k = kstart, idxI-1
+!!!      do ii=1,N_int
+!!!        diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
+!!!        ndiffSOMO += POPCNT(diffSOMO)
+!!!      end do
+!!!      ! ndiffSOMO cannot be 0 (I /= k)
+!!!      ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
+!!!      ! this Icfg could not have been generated before.
+!!!      if (ndiffSOMO /= 2) cycle
+!!!      ndiffDOMO = 0
+!!!      nxordiffSOMODOMO = 0
+!!!      do ii=1,N_int
+!!!        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
+!!!        xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+!!!        ndiffDOMO += POPCNT(diffDOMO)
+!!!        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+!!!      end do
+!!!      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
+!!!        ppExistsQ = .TRUE.
+!!!        EXIT
+!!!      endif
+!!!    end do
+!!!    ! Diagonal part (pp,qq)
+!!!    if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+!!!      ! SOMO
+!!!      if(POPCNT(Jsomo) .ge. NSOMOMin) then
+!!!        NalphaIcfg += 1
+!!!        alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1)
+!!!        alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2)
+!!!        NalphaIcfg_list(idxI) = NalphaIcfg
+!!!      endif
+!!!    endif
+!!!
+!!!    NalphaIcfg = 0
+!!!  enddo ! end loop idxI
+!!!  call wall_time(t1)
+!!!  print *, 'Preparation : ', t1 - t0
+!!!
+!!!END_PROVIDER
+
  BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
 &BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
   implicit none
@@ -21,18 +545,16 @@ use bitmasks
   integer                        :: nvmos
   integer                        :: listvmos(mo_num)
   integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
-  integer*8                      :: Idomo, Idomop, Idomoq 
-  integer*8                      :: Isomo, Isomop, Isomoq
-  integer*8                      :: Jdomo, Jdomop, Jdomoq
-  integer*8                      :: Jsomo, Jsomop, Jsomoq
-  integer*8                      :: diffSOMO
-  integer*8                      :: diffDOMO
-  integer*8                      :: xordiffSOMODOMO
+  integer(bit_kind)              :: Idomo(N_int), Idomop(N_int), Idomoq(N_int)
+  integer(bit_kind)              :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) 
+  integer(bit_kind)              :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int)
+  integer(bit_kind)              :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int)
+  integer(bit_kind)              :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
   integer                        :: ndiffSOMO
   integer                        :: ndiffDOMO
   integer                        :: nxordiffSOMODOMO
   integer                        :: ndiffAll
-  integer                        :: i,ii,iii
+  integer                        :: i,ii,iii, iint, jint, ipos, jpos
   integer                        :: j,jj, i_s, i_d
   integer                        :: k,kk
   integer                        :: kstart
@@ -60,56 +582,30 @@ use bitmasks
 
     Icfg  = psi_configuration(:,:,idxI)
     Jcfg  = psi_configuration(:,:,idxI)
-    !print *," Jcfg somo=",Jcfg(1,1), " ", Jcfg(2,1)
-    !print *," Jcfg domo=",Jcfg(1,2), " ", Jcfg(2,2)
 
-    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
-    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+    enddo
 
     ! find out all pq holes possible
     nholes = 0
-    ! holes in SOMO
-    !do ii = 1,n_act_orb
-    !  i = list_act(ii)
-    !  if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
-    !    nholes += 1
-    !    listholes(nholes) = i
-    !    holetype(nholes) = 1
-    !  endif
-    !end do
-        call bitstring_to_list(psi_configuration(1,1,idxI),listall,nelall,N_int)
+        call bitstring_to_list(Isomo,listall,nelall,N_int)
 
-        !print *,'list somo'
         do iii=1,nelall
           nholes += 1
           listholes(nholes) = listall(iii)
-            !print *,listall(iii)
           holetype(nholes) = 1
         end do
 
         Nsomo_I = nelall
 
-    ! holes in DOMO
-    !do ii = 1,n_act_orb
-    !  i = list_act(ii)
-    !  if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
-    !    nholes += 1
-    !    listholes(nholes) = i
-    !    holetype(nholes) = 2
-    !  endif
-    !end do
+        call bitstring_to_list(Idomo,listall,nelall,N_int)
 
-    !do iii=1,N_int
-    !  print *,' iii=',iii, psi_configuration(iii,2,idxI), ' idxI=',idxI
-    !end do
-        call bitstring_to_list(psi_configuration(1,2,idxI),listall,nelall,N_int)
-
-        !print *,'list domo ncore=',n_core_orb, ' nelall=',nelall
         do iii=1,nelall
           if(listall(iii) .gt. n_core_orb)then
             nholes += 1
             listholes(nholes) = listall(iii)
-            !print *,listall(iii)
             holetype(nholes) = 2
           endif
         end do
@@ -118,38 +614,19 @@ use bitmasks
     listvmos = -1
     vmotype = -1
     nvmos = 0
-    !do ii = 1,n_act_orb
-    !  i = list_act(ii)
-    !  if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
-    !    if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
-    !      nvmos += 1
-    !      listvmos(nvmos) = i
-    !      print *,'1 i=',i
-    !      vmotype(nvmos) = 1
-    !    else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
-    !      nvmos += 1
-    !      listvmos(nvmos) = i
-    !      print *,'2 i=',i
-    !      vmotype(nvmos) = 2
-    !    end if
-    !  end if
-    !end do
-    !print *,'-----------'
 
     ! Take into account N_int
     do ii = 1, n_act_orb
       iii = list_act(ii)
-      i_s = (1+((iii-1)/63))
-      i = iii - ( i_s -1 )*63 
-      Isomo = iand(act_bitmask(i_s,1),Icfg(i_s,1))
-      Idomo = iand(act_bitmask(i_s,1),Icfg(i_s,2))
+      iint = shiftr(iii-1,bit_kind_shift) + 1
+      ipos = iii-shiftl((iint-1),bit_kind_shift)-1
 
-      if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
-        if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+      if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
+        if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
           nvmos += 1
           listvmos(nvmos) = iii
           vmotype(nvmos) = 1
-        else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+        else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then
           nvmos += 1
           listvmos(nvmos) = iii
           vmotype(nvmos) = 2
@@ -160,9 +637,13 @@ use bitmasks
     tableUniqueAlphas = .FALSE.
 
     ! Now find the allowed (p,q) excitations
-    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
-    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
-    !Nsomo_I = POPCNT(Isomo)
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Jsomo(i) = Isomo(i)
+      Jdomo(i) = Idomo(i)
+    enddo
+
     if(Nsomo_I .EQ. 0) then
       kstart = 1
     else
@@ -172,110 +653,25 @@ use bitmasks
 
     do i = 1,nholes
       pp  = listholes(i)
-      p_s = (1+((pp-1)/63))
-      p   = pp - (p_s - 1)*63
-      !print *,' pp=',pp, ' p_s=',p_s, ' p=',p
+      iint = shiftr(pp-1,bit_kind_shift) + 1
+      ipos = pp-shiftl((iint-1),bit_kind_shift)-1
+      if(holetype(i) == 1)then
+        Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
+      else if(holetype(i) == 2)then
+        Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+      endif
+
       do j = 1,nvmos
         qq  = listvmos(j)
-        q_s = (1+((qq-1)/63))
-        q   = qq - (q_s - 1)*63
-        !print *,' qq=',qq, ' q_s=',q_s, ' q=',q
-        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
-        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
-        Isomop = iand(act_bitmask(i_s,1),Icfg(q_s,1))
-        Idomop = iand(act_bitmask(i_s,1),Icfg(q_s,2))
-        if(p .EQ. q) cycle
-        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
-          ! SOMO -> VMO
-          !print *,'SOMO -> VMO'
-          if (p_s .eq. q_s) then
-            Jsomop = IBCLR(Isomop,p-1)
-            Jsomop = IBSET(Jsomop,q-1)
-            Jsomoq = Jsomop
-          else
-            Jsomop = IBCLR(Isomop,p-1)
-            Jsomoq = IBSET(Isomoq,q-1)
-          endif
-
-          ! Domo remains the same
-          Jdomop = Idomop
-          Jdomoq = Idomoq
-
-          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
-          kend = idxI-1
-        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
-          ! SOMO -> SOMO
-          !print *,'SOMO -> SOMO'
-          if(p_s .eq. q_s) then
-            Jsomop = IBCLR(Isomop,p-1)
-            Jsomop = IBCLR(Jsomop,q-1)
-            Jsomoq = Jsomop
-          else
-            Jsomop = IBCLR(Isomop,p-1)
-            Jsomoq = IBCLR(Isomoq,q-1)
-          endif
-
-          Jdomoq = IBSET(Idomoq,q-1)
-
-          ! Check for Minimal alpha electrons (MS)
-          if(POPCNT(Jsomoq).ge.MS)then
-            kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
-            kend = idxI-1
-          else
-            cycle
-          endif
-        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
-          ! DOMO -> VMO
-          !print *,'DOMO -> VMO', Isomop, p, q, Jsomop
-          if(p_s .eq. q_s) then
-            Jsomop = IBSET(Isomop,p-1)
-            Jsomop = IBSET(Jsomop,q-1)
-            Jsomoq = Jsomop
-          else
-            Jsomop = IBSET(Isomop,p-1)
-            Jsomoq = IBSET(Jsomoq,q-1)
-          endif
-          !print *, 'Jsomop=', Jsomop
-
-          Jdomop = IBCLR(Idomop,p-1)
-
-          kstart = cfg_seniority_index(Nsomo_I)
-          kend = idxI-1
-        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
-          ! DOMO -> SOMO
-          !print *,'DOMO -> SOMO'
-          if(p_s .eq. q_s) then
-            Jsomop = IBSET(Isomop,p-1)
-            Jsomop = IBCLR(Jsomop,q-1)
-            Jsomoq = Jsomop
-
-            Jdomop = IBCLR(Idomop,p-1)
-            Jdomop = IBSET(Jdomop,q-1)
-            Jdomoq = Jdomop
-          else
-            Jsomop = IBSET(Isomop,p-1)
-            Jsomoq = IBCLR(Jsomoq,q-1)
-
-            Jdomop = IBCLR(Idomop,p-1)
-            Jdomoq = IBSET(Jdomoq,q-1)
-          endif
-
-          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
-          kend = idxI-1
-        else
-          print*,"Something went wrong in obtain_associated_alphaI"
+        jint = shiftr(qq-1,bit_kind_shift) + 1
+        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBSET(Jdomo(jint),jpos)
         endif
 
-        ! Save it to Jcfg
-        !print *,i,j,"0| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
-        Jcfg(p_s,1) = Jsomop
-        Jcfg(q_s,1) = Jsomoq
-        Jcfg(p_s,2) = Jdomop
-        Jcfg(q_s,2) = Jdomoq
-        !print *,'p_s=',p_s,' q_s=', q_s
-        !print *,'Jsomop=',Jsomop, ' Jsomoq=', Jsomoq, ' Jdomop=', Jdomop, ' Jdomoq=', Jdomo
-        !print *,i,j,"1| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
-        call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int)
+        call bitstring_to_list(Jcfg,listall,nelall,N_int)
         Nsomo_J = nelall
 
         ! Check for Minimal alpha electrons (MS)
@@ -291,15 +687,6 @@ use bitmasks
         pqAlreadyGenQ = .FALSE.
         ! First check if it can be generated before
         do k = kstart, kend
-          !diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
-          !ndiffSOMO = POPCNT(diffSOMO)
-          !if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
-          !diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
-          !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-          !ndiffDOMO = POPCNT(diffDOMO)
-          !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-          !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
-
           ndiffSOMO = 0
           ndiffDOMO = 0
           nxordiffSOMODOMO = 0
@@ -307,11 +694,11 @@ use bitmasks
             Jsomo = Jcfg(ii,1)
             Jdomo = Jcfg(ii,2)
             diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
-            ndiffSOMO += POPCNT(diffSOMO)
+            ndiffSOMO += POPCNT(diffSOMO(ii))
             diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
             xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-            ndiffDOMO += POPCNT(diffDOMO)
-            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+            ndiffDOMO += POPCNT(diffDOMO(ii))
+            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
             nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
           end do
 
@@ -335,7 +722,19 @@ use bitmasks
         if(.NOT. pqExistsQ) then
           tableUniqueAlphas(p,q) = .TRUE.
         endif
+        
+
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+        endif
       end do
+      if(holetype(i) == 1)then
+        Jsomo(iint) = IBSET(Jsomo(iint),ipos)
+      else if(holetype(i) == 2)then
+        Jdomo(iint) = IBSET(Jdomo(iint),ipos)
+      endif
     end do
 
     !print *,tableUniqueAlphas(:,:)
@@ -347,117 +746,33 @@ use bitmasks
     Jdomo = Icfg(1,2)
     NalphaIcfg = 0
     do i = 1, nholes
-      !p = listholes(i)
       pp  = listholes(i)
-      p_s = (1+((pp-1)/63))
-      p   = pp - (p_s - 1)*63
+      iint = shiftr(pp-1,bit_kind_shift) + 1
+      ipos = pp-shiftl((iint-1),bit_kind_shift)-1
+      if(holetype(i) == 1)then
+        Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
+      else if(holetype(i) == 2)then
+        Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+      endif
+
       do j = 1, nvmos
-        !q = listvmos(j)
         qq  = listvmos(j)
-        q_s = (1+((qq-1)/63))
-        q   = qq - (q_s - 1)*63
-        Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1))
-        Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2))
-        Isomoq = iand(act_bitmask(i_s,1),Icfg(q_s,1))
-        Idomoq = iand(act_bitmask(i_s,1),Icfg(q_s,2))
-        if(p .EQ. q) cycle
-        if(tableUniqueAlphas(p,q)) then
-          if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
-            ! SOMO -> VMO
-            !Jsomo = IBCLR(Isomo,p-1)
-            !Jsomo = IBSET(Jsomo,q-1)
-            !Jdomo = Idomo
-            if (p_s .eq. q_s) then
-              Jsomop = IBCLR(Isomop,p-1)
-              Jsomop = IBSET(Jsomop,q-1)
-              Jsomoq = Jsomop
-            else
-              Jsomop = IBCLR(Isomop,p-1)
-              Jsomoq = IBSET(Isomoq,q-1)
-            endif
+        jint = shiftr(qq-1,bit_kind_shift) + 1
+        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBSET(Jdomo(jint),jpos)
+        endif
+        if(pp .EQ. qq) cycle
+        if(tableUniqueAlphas(pp,qq)) then
 
-            ! Domo remains the same
-            Jdomop = Idomop
-            Jdomoq = Idomoq
+          call bitstring_to_list(Jcfg,listall,nelall,N_int)
+          Nsomo_J = nelall
 
-          else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
-            ! SOMO -> SOMO
-            !Jsomo = IBCLR(Isomo,p-1)
-            !Jsomo = IBCLR(Jsomo,q-1)
-            !Jdomo = IBSET(Idomo,q-1)
-
-            if(p_s .eq. q_s) then
-              Jsomop = IBCLR(Isomop,p-1)
-              Jsomop = IBCLR(Jsomop,q-1)
-              Jsomoq = Jsomop
-            else
-              Jsomop = IBCLR(Isomop,p-1)
-              Jsomoq = IBCLR(Isomoq,q-1)
-            endif
-
-            Jdomoq = IBSET(Idomoq,q-1)
-
-            if(POPCNT(Jsomoq).ge.MS)then
-              kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
-              kend = idxI-1
-            else
-              cycle
-            endif
-          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
-            ! DOMO -> VMO
-            !Jsomo = IBSET(Isomo,p-1)
-            !Jsomo = IBSET(Jsomo,q-1)
-            !Jdomo = IBCLR(Idomo,p-1)
-
-            if(p_s .eq. q_s) then
-              Jsomop = IBSET(Isomop,p-1)
-              Jsomop = IBSET(Jsomop,q-1)
-              Jsomoq = Jsomop
-            else
-              Jsomop = IBSET(Isomop,p-1)
-              Jsomoq = IBSET(Jsomoq,q-1)
-            endif
-
-            Jdomop = IBCLR(Idomop,p-1)
-
-          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
-            ! DOMO -> SOMO
-            !Jsomo = IBSET(Isomo,p-1)
-            !Jsomo = IBCLR(Jsomo,q-1)
-            !Jdomo = IBCLR(Idomo,p-1)
-            !Jdomo = IBSET(Jdomo,q-1)
-            if(p_s .eq. q_s) then
-              Jsomop = IBSET(Isomop,p-1)
-              Jsomop = IBCLR(Jsomop,q-1)
-              Jsomoq = Jsomop
-
-              Jdomop = IBCLR(Idomop,p-1)
-              Jdomop = IBSET(Jdomop,q-1)
-              Jdomoq = Jdomop
-            else
-              Jsomop = IBSET(Isomop,p-1)
-              Jsomoq = IBCLR(Jsomoq,q-1)
-
-              Jdomop = IBCLR(Idomop,p-1)
-              Jdomoq = IBSET(Jdomoq,q-1)
-            endif
-
-          else
-            print*,"Something went wrong in obtain_associated_alphaI"
-          endif
-
-          ! Save it to Jcfg
-          Jcfg(p_s,1) = Jsomop
-          Jcfg(q_s,1) = Jsomoq
-          Jcfg(p_s,2) = Jdomop
-          Jcfg(q_s,2) = Jdomoq
-
-          ! SOMO
-          !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
-          if(POPCNT(Jsomo) .ge. NSOMOMin) then
+          if(Nsomo_J .ge. NSOMOMin) then
             NalphaIcfg += 1
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
-            !alphasIcfg_list(:,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
             if(n_core_orb .le. 63)then
               alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
             else
@@ -472,22 +787,35 @@ use bitmasks
               end do
             endif
             NalphaIcfg_list(idxI) = NalphaIcfg
-            !print *,i,j,"2| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1)
           endif
         endif
+
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+        endif
       end do
+      if(holetype(i) == 1)then
+        Jsomo(iint) = IBSET(Jsomo(iint),ipos)
+      else if(holetype(i) == 2)then
+        Jdomo(iint) = IBSET(Jdomo(iint),ipos)
+      endif
     end do
 
     ! Check if this Icfg has been previously generated as a mono
     ppExistsQ = .False.
-    Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
-    Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+    enddo
+
     kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
     ndiffDOMO = 0
     do k = kstart, idxI-1
       do ii=1,N_int
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
-        ndiffSOMO += POPCNT(diffSOMO)
+        ndiffSOMO += POPCNT(diffSOMO(ii))
       end do
       ! ndiffSOMO cannot be 0 (I /= k)
       ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
@@ -498,8 +826,8 @@ use bitmasks
       do ii=1,N_int
         diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
         xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-        ndiffDOMO += POPCNT(diffDOMO)
-        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+        ndiffDOMO += POPCNT(diffDOMO(ii))
+        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
       end do
       if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
         ppExistsQ = .TRUE.
@@ -509,7 +837,7 @@ use bitmasks
     ! Diagonal part (pp,qq)
     if(nholes > 0 .AND. (.NOT. ppExistsQ))then
       ! SOMO
-      if(POPCNT(Jsomo) .ge. NSOMOMin) then
+      if(Nsomo_I .ge. NSOMOMin) then
         NalphaIcfg += 1
         alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1)
         alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2)

From 57bb5ed4dd6812e030e02e6208d96711650fa63e Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Wed, 23 Nov 2022 12:10:22 +0100
Subject: [PATCH 04/35] Remove orthoqr csf.

---
 src/csf/cfgCI_utils.c          |  36 ++---
 src/utils/linear_algebra.irp.f | 256 ++++++++++++++++-----------------
 2 files changed, 146 insertions(+), 146 deletions(-)

diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c
index 76b64dd0..5807375a 100644
--- a/src/csf/cfgCI_utils.c
+++ b/src/csf/cfgCI_utils.c
@@ -253,25 +253,25 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
     buildTreeDriver(bftree, *NSOMO, MS, NBF);
 }
 
-void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols);
+//void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols);
 
-void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
-  int i,j;
-  //for(j=0;j<cols;++j){
-  //  for(i=0;i<rows;++i){
-  //    printf(" %3.2f ",overlapMatrix[j*rows + i]);
-  //  }
-  //  printf("\n");
-  //}
-  // Call the function ortho_qr from qp
-  ortho_qr_csf(overlapMatrix, rows, orthoMatrix, rows, cols);
-  //for(j=0;j<cols;++j){
-  //  for(i=0;i<rows;++i){
-  //    printf(" %3.2f ",orthoMatrix[j*rows + i]);
-  //  }
-  //  printf("\n");
-  //}
-}
+//void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
+//  int i,j;
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",overlapMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//  // Call the function ortho_qr from qp
+//  ortho_qr_csf(overlapMatrix, rows, orthoMatrix, rows, cols);
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",orthoMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//}
 
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
 
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 3463370f..d1abf87b 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -458,37 +458,37 @@ subroutine ortho_canonical_complex(overlap,LDA,N,C,LDC,m,cutoff)
 end
 
 
-subroutine ortho_qr_complex(A,LDA,m,n)
-  implicit none
-  BEGIN_DOC
-  ! Orthogonalization using Q.R factorization
-  !
-  ! A : matrix to orthogonalize
-  !
-  ! LDA : leftmost dimension of A
-  !
-  ! n : Number of rows of A
-  !
-  ! m : Number of columns of A
-  !
-  END_DOC
-  integer, intent(in)            :: m,n, LDA
-  complex*16, intent(inout) :: A(LDA,n)
-
-  integer                        :: lwork, info
-  integer, allocatable           :: jpvt(:)
-  complex*16, allocatable  :: tau(:), work(:)
-
-  allocate (jpvt(n), tau(n), work(1))
-  LWORK=-1
-  call  zgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
-  LWORK=2*int(WORK(1))
-  deallocate(WORK)
-  allocate(WORK(LWORK))
-  call  zgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
-  call  zungqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
-  deallocate(WORK,jpvt,tau)
-end
+!!subroutine ortho_qr_complex(A,LDA,m,n)
+!!  implicit none
+!!  BEGIN_DOC
+!!  ! Orthogonalization using Q.R factorization
+!!  !
+!!  ! A : matrix to orthogonalize
+!!  !
+!!  ! LDA : leftmost dimension of A
+!!  !
+!!  ! n : Number of rows of A
+!!  !
+!!  ! m : Number of columns of A
+!!  !
+!!  END_DOC
+!!  integer, intent(in)            :: m,n, LDA
+!!  complex*16, intent(inout) :: A(LDA,n)
+!!
+!!  integer                        :: lwork, info
+!!  integer, allocatable           :: jpvt(:)
+!!  complex*16, allocatable  :: tau(:), work(:)
+!!
+!!  allocate (jpvt(n), tau(n), work(1))
+!!  LWORK=-1
+!!  call  zgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!!  LWORK=2*int(WORK(1))
+!!  deallocate(WORK)
+!!  allocate(WORK(LWORK))
+!!  call  zgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!!  call  zungqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
+!!  deallocate(WORK,jpvt,tau)
+!!end
 
 subroutine ortho_qr_unblocked_complex(A,LDA,m,n)
   implicit none
@@ -1132,103 +1132,103 @@ subroutine ortho_svd(A,LDA,m,n)
   deallocate(U,D, Vt)
 
 end
-
-subroutine ortho_qr_withB(A,LDA,B,m,n)
-  implicit none
-  BEGIN_DOC
-  ! Orthogonalization using Q.R factorization
-  !
-  ! A : Overlap Matrix
-  !
-  ! LDA : leftmost dimension of A
-  !
-  ! m : Number of rows of A
-  !
-  ! n : Number of columns of A
-  !
-  ! B : Output orthogonal basis
-  !
-  END_DOC
-  integer, intent(in)            :: m,n, LDA
-  double precision, intent(inout) :: A(LDA,n)
-  double precision, intent(inout) :: B(LDA,n)
-
-  integer                        :: LWORK, INFO
-  integer, allocatable           :: jpvt(:)
-  double precision, allocatable  :: TAU(:), WORK(:)
-  double precision, allocatable  :: C(:,:)
-  double precision :: norm
-  integer :: i,j
-
-  allocate (TAU(min(m,n)), WORK(1))
-  allocate (jpvt(n))
-  !print *," In function ortho"
-  B = A
-
-  jpvt(1:n)=1
-
-  LWORK=-1
-  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
-
-  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-  LWORK=max(n,int(WORK(1)))
-
-  deallocate(WORK)
-  allocate(WORK(LWORK))
-  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
-  print *,A
-  print *,jpvt
-  deallocate(WORK,TAU)
-  !stop
-
-  !LWORK=-1
-  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
-  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-  !LWORK=max(n,int(WORK(1)))
-
-  !deallocate(WORK)
-  !allocate(WORK(LWORK))
-  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
-
-  !LWORK=-1
-  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
-  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-  !LWORK=max(n,int(WORK(1)))
-
-  !deallocate(WORK)
-  !allocate(WORK(LWORK))
-  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
-  !
-  !allocate(C(LDA,n))
-  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
-  !norm = 0.0d0
-  !B = 0.0d0
-  !!print *,C
-  !do i=1,m
-  !  norm = 0.0d0
-  !  do j=1,n
-  !    norm = norm + C(j,i)*C(j,i)
-  !  end do
-  !  norm = 1.0d0/dsqrt(norm)
-  !  do j=1,n
-  !    B(j,i) = C(j,i)
-  !  end do
-  !end do
-  !print *,B
-
-
-  !deallocate(WORK,TAU)
-end
-
-subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
-  use iso_c_binding
-  integer(c_int32_t), value      :: LDA
-  integer(c_int32_t), value      :: m
-  integer(c_int32_t), value      :: n
-  integer(c_int16_t)             :: A(LDA,n)
-  integer(c_int16_t)             :: B(LDA,n)
-  call ortho_qr_withB(A,LDA,B,m,n)
-end subroutine ortho_qr_csf
+!!
+!!subroutine ortho_qr_withB(A,LDA,B,m,n)
+!!  implicit none
+!!  BEGIN_DOC
+!!  ! Orthogonalization using Q.R factorization
+!!  !
+!!  ! A : Overlap Matrix
+!!  !
+!!  ! LDA : leftmost dimension of A
+!!  !
+!!  ! m : Number of rows of A
+!!  !
+!!  ! n : Number of columns of A
+!!  !
+!!  ! B : Output orthogonal basis
+!!  !
+!!  END_DOC
+!!  integer, intent(in)            :: m,n, LDA
+!!  double precision, intent(inout) :: A(LDA,n)
+!!  double precision, intent(inout) :: B(LDA,n)
+!!
+!!  integer                        :: LWORK, INFO
+!!  integer, allocatable           :: jpvt(:)
+!!  double precision, allocatable  :: TAU(:), WORK(:)
+!!  double precision, allocatable  :: C(:,:)
+!!  double precision :: norm
+!!  integer :: i,j
+!!
+!!  allocate (TAU(min(m,n)), WORK(1))
+!!  allocate (jpvt(n))
+!!  !print *," In function ortho"
+!!  B = A
+!!
+!!  jpvt(1:n)=1
+!!
+!!  LWORK=-1
+!!  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!!
+!!  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!!  LWORK=max(n,int(WORK(1)))
+!!
+!!  deallocate(WORK)
+!!  allocate(WORK(LWORK))
+!!  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!!  print *,A
+!!  print *,jpvt
+!!  deallocate(WORK,TAU)
+!!  !stop
+!!
+!!  !LWORK=-1
+!!  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!!  !LWORK=max(n,int(WORK(1)))
+!!
+!!  !deallocate(WORK)
+!!  !allocate(WORK(LWORK))
+!!  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!!
+!!  !LWORK=-1
+!!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!!  !LWORK=max(n,int(WORK(1)))
+!!
+!!  !deallocate(WORK)
+!!  !allocate(WORK(LWORK))
+!!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!!  !
+!!  !allocate(C(LDA,n))
+!!  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
+!!  !norm = 0.0d0
+!!  !B = 0.0d0
+!!  !!print *,C
+!!  !do i=1,m
+!!  !  norm = 0.0d0
+!!  !  do j=1,n
+!!  !    norm = norm + C(j,i)*C(j,i)
+!!  !  end do
+!!  !  norm = 1.0d0/dsqrt(norm)
+!!  !  do j=1,n
+!!  !    B(j,i) = C(j,i)
+!!  !  end do
+!!  !end do
+!!  !print *,B
+!!
+!!
+!!  !deallocate(WORK,TAU)
+!!end
+!!
+!!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
+!!  use iso_c_binding
+!!  integer(c_int32_t), value      :: LDA
+!!  integer(c_int32_t), value      :: m
+!!  integer(c_int32_t), value      :: n
+!!  integer(c_int16_t)             :: A(LDA,n)
+!!  integer(c_int16_t)             :: B(LDA,n)
+!!  call ortho_qr_withB(A,LDA,B,m,n)
+!!end subroutine ortho_qr_csf
 
 subroutine ortho_qr(A,LDA,m,n)
   implicit none

From 677c6a6324f5366bb621ed68c88dedff4a23f39b Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Wed, 23 Nov 2022 13:58:32 +0100
Subject: [PATCH 05/35] Remove double comment.

---
 src/utils/linear_algebra.irp.f | 256 ++++++++++++++++-----------------
 1 file changed, 128 insertions(+), 128 deletions(-)

diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index d1abf87b..aa1bde97 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -458,37 +458,37 @@ subroutine ortho_canonical_complex(overlap,LDA,N,C,LDC,m,cutoff)
 end
 
 
-!!subroutine ortho_qr_complex(A,LDA,m,n)
-!!  implicit none
-!!  BEGIN_DOC
-!!  ! Orthogonalization using Q.R factorization
-!!  !
-!!  ! A : matrix to orthogonalize
-!!  !
-!!  ! LDA : leftmost dimension of A
-!!  !
-!!  ! n : Number of rows of A
-!!  !
-!!  ! m : Number of columns of A
-!!  !
-!!  END_DOC
-!!  integer, intent(in)            :: m,n, LDA
-!!  complex*16, intent(inout) :: A(LDA,n)
-!!
-!!  integer                        :: lwork, info
-!!  integer, allocatable           :: jpvt(:)
-!!  complex*16, allocatable  :: tau(:), work(:)
-!!
-!!  allocate (jpvt(n), tau(n), work(1))
-!!  LWORK=-1
-!!  call  zgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
-!!  LWORK=2*int(WORK(1))
-!!  deallocate(WORK)
-!!  allocate(WORK(LWORK))
-!!  call  zgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
-!!  call  zungqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
-!!  deallocate(WORK,jpvt,tau)
-!!end
+subroutine ortho_qr_complex(A,LDA,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !
+  ! A : matrix to orthogonalize
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! n : Number of rows of A
+  !
+  ! m : Number of columns of A
+  !
+  END_DOC
+  integer, intent(in)            :: m,n, LDA
+  complex*16, intent(inout) :: A(LDA,n)
+
+  integer                        :: lwork, info
+  integer, allocatable           :: jpvt(:)
+  complex*16, allocatable  :: tau(:), work(:)
+
+  allocate (jpvt(n), tau(n), work(1))
+  LWORK=-1
+  call  zgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  LWORK=2*int(WORK(1))
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  call  zgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  call  zungqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
+  deallocate(WORK,jpvt,tau)
+end
 
 subroutine ortho_qr_unblocked_complex(A,LDA,m,n)
   implicit none
@@ -1132,103 +1132,103 @@ subroutine ortho_svd(A,LDA,m,n)
   deallocate(U,D, Vt)
 
 end
-!!
-!!subroutine ortho_qr_withB(A,LDA,B,m,n)
-!!  implicit none
-!!  BEGIN_DOC
-!!  ! Orthogonalization using Q.R factorization
-!!  !
-!!  ! A : Overlap Matrix
-!!  !
-!!  ! LDA : leftmost dimension of A
-!!  !
-!!  ! m : Number of rows of A
-!!  !
-!!  ! n : Number of columns of A
-!!  !
-!!  ! B : Output orthogonal basis
-!!  !
-!!  END_DOC
-!!  integer, intent(in)            :: m,n, LDA
-!!  double precision, intent(inout) :: A(LDA,n)
-!!  double precision, intent(inout) :: B(LDA,n)
-!!
-!!  integer                        :: LWORK, INFO
-!!  integer, allocatable           :: jpvt(:)
-!!  double precision, allocatable  :: TAU(:), WORK(:)
-!!  double precision, allocatable  :: C(:,:)
-!!  double precision :: norm
-!!  integer :: i,j
-!!
-!!  allocate (TAU(min(m,n)), WORK(1))
-!!  allocate (jpvt(n))
-!!  !print *," In function ortho"
-!!  B = A
-!!
-!!  jpvt(1:n)=1
-!!
-!!  LWORK=-1
-!!  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
-!!
-!!  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-!!  LWORK=max(n,int(WORK(1)))
-!!
-!!  deallocate(WORK)
-!!  allocate(WORK(LWORK))
-!!  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
-!!  print *,A
-!!  print *,jpvt
-!!  deallocate(WORK,TAU)
-!!  !stop
-!!
-!!  !LWORK=-1
-!!  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
-!!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-!!  !LWORK=max(n,int(WORK(1)))
-!!
-!!  !deallocate(WORK)
-!!  !allocate(WORK(LWORK))
-!!  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
-!!
-!!  !LWORK=-1
-!!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
-!!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
-!!  !LWORK=max(n,int(WORK(1)))
-!!
-!!  !deallocate(WORK)
-!!  !allocate(WORK(LWORK))
-!!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
-!!  !
-!!  !allocate(C(LDA,n))
-!!  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
-!!  !norm = 0.0d0
-!!  !B = 0.0d0
-!!  !!print *,C
-!!  !do i=1,m
-!!  !  norm = 0.0d0
-!!  !  do j=1,n
-!!  !    norm = norm + C(j,i)*C(j,i)
-!!  !  end do
-!!  !  norm = 1.0d0/dsqrt(norm)
-!!  !  do j=1,n
-!!  !    B(j,i) = C(j,i)
-!!  !  end do
-!!  !end do
-!!  !print *,B
-!!
-!!
-!!  !deallocate(WORK,TAU)
-!!end
-!!
-!!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
-!!  use iso_c_binding
-!!  integer(c_int32_t), value      :: LDA
-!!  integer(c_int32_t), value      :: m
-!!  integer(c_int32_t), value      :: n
-!!  integer(c_int16_t)             :: A(LDA,n)
-!!  integer(c_int16_t)             :: B(LDA,n)
-!!  call ortho_qr_withB(A,LDA,B,m,n)
-!!end subroutine ortho_qr_csf
+
+!subroutine ortho_qr_withB(A,LDA,B,m,n)
+!  implicit none
+!  BEGIN_DOC
+!  ! Orthogonalization using Q.R factorization
+!  !
+!  ! A : Overlap Matrix
+!  !
+!  ! LDA : leftmost dimension of A
+!  !
+!  ! m : Number of rows of A
+!  !
+!  ! n : Number of columns of A
+!  !
+!  ! B : Output orthogonal basis
+!  !
+!  END_DOC
+!  integer, intent(in)            :: m,n, LDA
+!  double precision, intent(inout) :: A(LDA,n)
+!  double precision, intent(inout) :: B(LDA,n)
+!
+!  integer                        :: LWORK, INFO
+!  integer, allocatable           :: jpvt(:)
+!  double precision, allocatable  :: TAU(:), WORK(:)
+!  double precision, allocatable  :: C(:,:)
+!  double precision :: norm
+!  integer :: i,j
+!
+!  allocate (TAU(min(m,n)), WORK(1))
+!  allocate (jpvt(n))
+!  !print *," In function ortho"
+!  B = A
+!
+!  jpvt(1:n)=1
+!
+!  LWORK=-1
+!  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!
+!  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  LWORK=max(n,int(WORK(1)))
+!
+!  deallocate(WORK)
+!  allocate(WORK(LWORK))
+!  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!  print *,A
+!  print *,jpvt
+!  deallocate(WORK,TAU)
+!  !stop
+!
+!  !LWORK=-1
+!  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!
+!  !LWORK=-1
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !
+!  !allocate(C(LDA,n))
+!  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
+!  !norm = 0.0d0
+!  !B = 0.0d0
+!  !!print *,C
+!  !do i=1,m
+!  !  norm = 0.0d0
+!  !  do j=1,n
+!  !    norm = norm + C(j,i)*C(j,i)
+!  !  end do
+!  !  norm = 1.0d0/dsqrt(norm)
+!  !  do j=1,n
+!  !    B(j,i) = C(j,i)
+!  !  end do
+!  !end do
+!  !print *,B
+!
+!
+!  !deallocate(WORK,TAU)
+!end
+!
+!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
+!  use iso_c_binding
+!  integer(c_int32_t), value      :: LDA
+!  integer(c_int32_t), value      :: m
+!  integer(c_int32_t), value      :: n
+!  integer(c_int16_t)             :: A(LDA,n)
+!  integer(c_int16_t)             :: B(LDA,n)
+!  call ortho_qr_withB(A,LDA,B,m,n)
+!end subroutine ortho_qr_csf
 
 subroutine ortho_qr(A,LDA,m,n)
   implicit none

From e9166599ad3f3d98052f978262a4620af7a8f2d5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 2 Dec 2022 11:26:36 +0100
Subject: [PATCH 06/35] added sleep 1 at the end of qp_run

---
 ocaml/qp_run.ml                |  7 ++++---
 src/utils/linear_algebra.irp.f | 22 ++++++++++++----------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml
index dfbab167..b9d14efe 100644
--- a/ocaml/qp_run.ml
+++ b/ocaml/qp_run.ml
@@ -6,7 +6,7 @@ open Qputils
 
 *)
 
-  
+
 let print_list () =
   Lazy.force Qpackage.executables
   |> List.iter (fun (x,_) -> Printf.printf " * %s\n" x)
@@ -151,10 +151,11 @@ let run slave ?prefix exe ezfio_file =
   let duration = Unix.time () -. time_start |> Unix.gmtime in
   let open Unix in
   let d, h, m, s =
-    duration.tm_yday, duration.tm_hour, duration.tm_min, duration.tm_sec 
+    duration.tm_yday, duration.tm_hour, duration.tm_min, duration.tm_sec
   in
   Printf.printf "Wall time: %d:%2.2d:%2.2d" (d*24+h) m s ;
   Printf.printf "\n\n";
+  Unix.sleep 1;
   if (exit_code <> 0) then
     exit exit_code
 
@@ -187,7 +188,7 @@ let () =
   end;
 
   (*  Handle options *)
-  let slave  = Command_line.get_bool "slave" 
+  let slave  = Command_line.get_bool "slave"
   and prefix = Command_line.get "prefix"
   in
 
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 6cb03ca7..61506a87 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1697,7 +1697,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
   ! TODO:  Costs O(n^4), but can be improved to (2 n^2 * log(n)):
   ! - copy all values in a 1D array
   ! - sort 1D array
-  ! - average nearby elements 
+  ! - average nearby elements
   ! - for all elements, find matching value in the sorted 1D array
 
   allocate(done(m,n))
@@ -1800,7 +1800,7 @@ end
 !       A_tmp(i,k) = A(i,k)
 !     enddo
 !   enddo
-! 
+!
 !   ! Find optimal size for temp arrays
 !   allocate(work(1))
 !   lwork = -1
@@ -1836,7 +1836,7 @@ end
 !   endif
 !
 !   deallocate(A_tmp,work)
-! 
+!
 !   !do j=1, m
 !   !  do i=1, LDU
 !   !    if (dabs(U(i,j)) < 1.d-14)  U(i,j) = 0.d0
@@ -1847,7 +1847,7 @@ end
 !   !    if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0
 !   !  enddo
 !   !enddo
-! 
+!
 !end
 !
 
@@ -1877,8 +1877,8 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim)
     enddo
   enddo
 
-  JOBVL  = "N" ! computes the left  eigenvectors 
-  JOBVR  = "V" ! computes the right eigenvectors 
+  JOBVL  = "N" ! computes the left  eigenvectors
+  JOBVR  = "V" ! computes the right eigenvectors
   BALANC = "B" ! Diagonal scaling and Permutation for optimization
   SENSE  = "V" ! Determines which reciprocal condition numbers are computed
   lda  = n
@@ -1888,10 +1888,10 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim)
   allocate( WORK(1), SCALE_array(n), RCONDE(n), RCONDV(n), IWORK(2*n-2) )
 
   LWORK = -1 ! to ask for the optimal size of WORK
-  call dgeevx( BALANC, JOBVL, JOBVR, SENSE                  & ! CHARACTERS 
+  call dgeevx( BALANC, JOBVL, JOBVR, SENSE                  & ! CHARACTERS
              , n, Atmp, lda                                 & ! MATRIX TO DIAGONALIZE
-             , WR, WI                                       & ! REAL AND IMAGINARY PART OF EIGENVALUES 
-             , VL, ldvl, VR, ldvr                           & ! LEFT AND RIGHT EIGENVECTORS 
+             , WR, WI                                       & ! REAL AND IMAGINARY PART OF EIGENVALUES
+             , VL, ldvl, VR, ldvr                           & ! LEFT AND RIGHT EIGENVECTORS
              , ILO, IHI, SCALE_array, ABNRM, RCONDE, RCONDV & ! OUTPUTS OF OPTIMIZATION
              , WORK, LWORK, IWORK, INFO )
 
@@ -1900,7 +1900,7 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim)
     stop
   endif
 
-  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK 
+  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK
   deallocate(WORK)
   allocate(WORK(LWORK))
   call dgeevx( BALANC, JOBVL, JOBVR, SENSE                  &
@@ -1982,4 +1982,6 @@ end subroutine diag_nonsym_right
 
 ! ---
 
+! Taken from GammCor thanks to Michal Hapka :-)
+
 

From f79ee5faa88ec27ec03236a4ce11fca84c259873 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Fri, 2 Dec 2022 11:30:05 +0100
Subject: [PATCH 07/35] Fixed bugs in p q excitations.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 384 +++++++++++--------
 1 file changed, 234 insertions(+), 150 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 581498c5..3794e8bb 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -671,11 +671,21 @@ use bitmasks
           Jdomo(jint) = IBSET(Jdomo(jint),jpos)
         endif
 
+        do ii=1, N_int
+          Jcfg(i,1) = Jsomo(i)
+          Jcfg(i,2) = Jdomo(i)
+        enddo
+
         call bitstring_to_list(Jcfg,listall,nelall,N_int)
         Nsomo_J = nelall
 
         ! Check for Minimal alpha electrons (MS)
         if(Nsomo_J.lt.MS)then
+          if(vmotype(j) == 1)then
+            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+          else if(vmotype(j) == 2)then
+            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          endif
           cycle
         endif
 
@@ -715,12 +725,19 @@ use bitmasks
           endif
         end do
 
-        if(pqAlreadyGenQ) cycle
+        if(pqAlreadyGenQ) then
+          if(vmotype(j) == 1)then
+            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+          else if(vmotype(j) == 2)then
+            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          endif
+          cycle
+        endif
 
         pqExistsQ = .FALSE.
 
         if(.NOT. pqExistsQ) then
-          tableUniqueAlphas(p,q) = .TRUE.
+          tableUniqueAlphas(pp,qq) = .TRUE.
         endif
         
 
@@ -740,10 +757,13 @@ use bitmasks
     !print *,tableUniqueAlphas(:,:)
 
     ! prune list of alphas
-    Isomo = Icfg(1,1)
-    Idomo = Icfg(1,2)
-    Jsomo = Icfg(1,1)
-    Jdomo = Icfg(1,2)
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
+      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
+      Jsomo(i) = Isomo(i)
+      Jdomo(i) = Idomo(i)
+    enddo
+
     NalphaIcfg = 0
     do i = 1, nholes
       pp  = listholes(i)
@@ -753,6 +773,7 @@ use bitmasks
         Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
       else if(holetype(i) == 2)then
         Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+        Jsomo(iint) = IBSET(Jsomo(iint),ipos)
       endif
 
       do j = 1, nvmos
@@ -763,14 +784,28 @@ use bitmasks
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
           Jdomo(jint) = IBSET(Jdomo(jint),jpos)
+          Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+        endif
+        if(pp .EQ. qq) then
+          if(vmotype(j) == 1)then
+            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+          else if(vmotype(j) == 2)then
+            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          endif
+          cycle
         endif
-        if(pp .EQ. qq) cycle
         if(tableUniqueAlphas(pp,qq)) then
 
+        do ii=1, N_int
+          Jcfg(i,1) = Jsomo(i)
+          Jcfg(i,2) = Jdomo(i)
+        enddo
+
           call bitstring_to_list(Jcfg,listall,nelall,N_int)
           Nsomo_J = nelall
 
           if(Nsomo_J .ge. NSOMOMin) then
+            !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
             if(n_core_orb .le. 63)then
@@ -794,12 +829,14 @@ use bitmasks
           Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
           Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         endif
       end do
       if(holetype(i) == 1)then
         Jsomo(iint) = IBSET(Jsomo(iint),ipos)
       else if(holetype(i) == 2)then
         Jdomo(iint) = IBSET(Jdomo(iint),ipos)
+        Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
       endif
     end do
 
@@ -863,6 +900,7 @@ END_PROVIDER
 
   integer,intent(in)                 :: idxI ! The id of the Ith CFG
   integer(bit_kind),intent(in)       :: Icfg(N_int,2)
+  integer(bit_kind)                  :: Jcfg(N_int,2)
   integer,intent(out)                :: NalphaIcfg
   integer(bit_kind),intent(out)      :: alphasIcfg(N_int,2,*)
   logical,dimension(:,:),allocatable :: tableUniqueAlphas
@@ -872,74 +910,84 @@ END_PROVIDER
   integer                            :: nvmos
   integer                            :: listvmos(mo_num)
   integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
-  integer*8                          :: Idomo
-  integer*8                          :: Isomo
-  integer*8                          :: Jdomo
-  integer*8                          :: Jsomo
-  integer*8                          :: diffSOMO
-  integer*8                          :: diffDOMO
-  integer*8                          :: xordiffSOMODOMO
+  integer(bit_kind)                  :: Idomo(N_int), Idomop(N_int), Idomoq(N_int)
+  integer(bit_kind)                  :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) 
+  integer(bit_kind)                  :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int)
+  integer(bit_kind)                  :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int)
+  integer(bit_kind)                  :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
   integer                            :: ndiffSOMO
   integer                            :: ndiffDOMO
   integer                            :: nxordiffSOMODOMO
   integer                            :: ndiffAll
   integer                            :: i, ii
   integer                            :: j, jj
+  integer                            :: iii, iint, jint, ipos, jpos
+  integer                            :: i_s, i_d
   integer                            :: k, kk
   integer                            :: kstart
   integer                            :: kend
-  integer                            :: Nsomo_I
-  integer                            :: hole
-  integer                            :: p
-  integer                            :: q
+  integer                            :: Nsomo_I, Nsomo_J
+  integer                            :: hole, n_core_orb_64
+  integer                            :: p, pp, p_s
+  integer                            :: q, qq, q_s
   integer                            :: countalphas
   logical                            :: pqAlreadyGenQ
   logical                            :: pqExistsQ
   logical                            :: ppExistsQ
-  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
-  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  integer :: listall(N_int*bit_kind_size), nelall
+
+  do i=1, N_int
+    Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+    Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+  enddo
+
   !print*,"Input cfg"
   !call debug_spindet(Isomo,1)
   !call debug_spindet(Idomo,1)
 
   ! find out all pq holes possible
-  nholes = 0
-  ! holes in SOMO
-  do ii = 1,n_act_orb
-    i = list_act(ii)
-     if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
-        nholes += 1
-        listholes(nholes) = i
-        holetype(nholes) = 1
-     endif
-  end do
-  ! holes in DOMO
-  do ii = 1,n_act_orb
-    i = list_act(ii)
-     if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
-        nholes += 1
-        listholes(nholes) = i
-        holetype(nholes) = 2
-     endif
-  end do
+    nholes = 0
+        call bitstring_to_list(Isomo,listall,nelall,N_int)
+
+        do iii=1,nelall
+          nholes += 1
+          listholes(nholes) = listall(iii)
+          holetype(nholes) = 1
+        end do
+
+        Nsomo_I = nelall
+
+        call bitstring_to_list(Idomo,listall,nelall,N_int)
+
+        do iii=1,nelall
+          if(listall(iii) .gt. n_core_orb)then
+            nholes += 1
+            listholes(nholes) = listall(iii)
+            holetype(nholes) = 2
+          endif
+        end do
+
 
   ! find vmos
-  listvmos = -1
-  vmotype = -1
-  nvmos = 0
-  do ii = 1,n_act_orb
-    i = list_act(ii)
-     !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
-     if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then
-        nvmos += 1
-        listvmos(nvmos) = i
-        vmotype(nvmos) = 1
-     else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then
-        nvmos += 1
-        listvmos(nvmos) = i
-        vmotype(nvmos) = 2
-     end if
-  end do
+    ! Take into account N_int
+    do ii = 1, n_act_orb
+      iii = list_act(ii)
+      iint = shiftr(iii-1,bit_kind_shift) + 1
+      ipos = iii-shiftl((iint-1),bit_kind_shift)-1
+
+      if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
+        if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
+          nvmos += 1
+          listvmos(nvmos) = iii
+          vmotype(nvmos) = 1
+        else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then
+          nvmos += 1
+          listvmos(nvmos) = iii
+          vmotype(nvmos) = 2
+        end if
+      end if
+    end do
+
 
   !print *,"Nvmo=",nvmos
   !print *,listvmos
@@ -948,10 +996,15 @@ END_PROVIDER
   allocate(tableUniqueAlphas(mo_num,mo_num))
   tableUniqueAlphas = .FALSE.
 
+    ! Now find the allowed (p,q) excitations
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Jsomo(i) = Isomo(i)
+      Jdomo(i) = Idomo(i)
+    enddo
+
   ! Now find the allowed (p,q) excitations
-  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
-  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
-  Nsomo_I = POPCNT(Isomo)
   if(Nsomo_I .EQ. 0) then
     kstart = 1
   else
@@ -971,41 +1024,40 @@ END_PROVIDER
   !enddo
 
   do i = 1,nholes
-     p = listholes(i)
+     pp  = listholes(i)
+     iint = shiftr(pp-1,bit_kind_shift) + 1
+     ipos = pp-shiftl((iint-1),bit_kind_shift)-1
+     if(holetype(i) == 1)then
+       Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
+     else if(holetype(i) == 2)then
+       Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+     endif
+
      do j = 1,nvmos
-        q = listvmos(j)
-        if(p .EQ. q) cycle
-        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
-           ! SOMO -> VMO
-           Jsomo = IBCLR(Isomo,p-1)
-           Jsomo = IBSET(Jsomo,q-1)
-           Jdomo = Idomo
-           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
-           kend = idxI-1
-        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
-           ! SOMO -> SOMO
-           Jsomo = IBCLR(Isomo,p-1)
-           Jsomo = IBCLR(Jsomo,q-1)
-           Jdomo = IBSET(Idomo,q-1)
-           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
-           kend = idxI-1
-        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
-           ! DOMO -> VMO
-           Jsomo = IBSET(Isomo,p-1)
-           Jsomo = IBSET(Jsomo,q-1)
-           Jdomo = IBCLR(Idomo,p-1)
-           kstart = cfg_seniority_index(Nsomo_I)
-           kend = idxI-1
-        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
-           ! DOMO -> SOMO
-           Jsomo = IBSET(Isomo,p-1)
-           Jsomo = IBCLR(Jsomo,q-1)
-           Jdomo = IBCLR(Idomo,p-1)
-           Jdomo = IBSET(Jdomo,q-1)
-           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
-           kend = idxI-1
-        else
-           print*,"Something went wrong in obtain_associated_alphaI"
+        qq  = listvmos(j)
+        jint = shiftr(qq-1,bit_kind_shift) + 1
+        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBSET(Jdomo(jint),jpos)
+        endif
+
+        do ii=1, N_int
+          Jcfg(i,1) = Jsomo(i)
+          Jcfg(i,2) = Jdomo(i)
+        enddo
+
+        call bitstring_to_list(Jcfg,listall,nelall,N_int)
+        Nsomo_J = nelall
+
+        if(pp .EQ. qq) then
+          if(vmotype(j) == 1)then
+            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+          else if(vmotype(j) == 2)then
+            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          endif
+          cycle
         endif
 
         ! Again, we don't have to search from 1
@@ -1016,14 +1068,21 @@ END_PROVIDER
         pqAlreadyGenQ = .FALSE.
         ! First check if it can be generated before
         do k = kstart, kend
-           diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
-           ndiffSOMO = POPCNT(diffSOMO)
+          ndiffSOMO = 0
+          ndiffDOMO = 0
+          nxordiffSOMODOMO = 0
+          do ii = 1, N_int
+            Jsomo = Jcfg(ii,1)
+            Jdomo = Jcfg(ii,2)
+            diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+            ndiffSOMO += POPCNT(diffSOMO(ii))
+            diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+            xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+            ndiffDOMO += POPCNT(diffDOMO(ii))
+            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
+            nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+          end do
            if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
-           diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
-           xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-           ndiffDOMO = POPCNT(diffDOMO)
-           nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-           nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
            !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
            if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
               pqAlreadyGenQ = .TRUE.
@@ -1033,19 +1092,20 @@ END_PROVIDER
            if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
               pqAlreadyGenQ = .TRUE.
               !EXIT
-              !ppExistsQ = .TRUE.
-              !print *,i,k,ndiffSOMO,ndiffDOMO
-              !call debug_spindet(Jsomo,1)
-              !call debug_spindet(Jdomo,1)
-              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1)
-              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1)
               EXIT
            endif
         end do
 
         !print *,"(,",p,",",q,")",pqAlreadyGenQ
 
-        if(pqAlreadyGenQ) cycle
+        if(pqAlreadyGenQ) then
+          if(vmotype(j) == 1)then
+            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+          else if(vmotype(j) == 2)then
+            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          endif
+          cycle
+        endif
 
         pqExistsQ = .FALSE.
         ! now check if this exists in the selected list
@@ -1066,53 +1126,67 @@ END_PROVIDER
            !call debug_spindet(Jsomo,1)
            !call debug_spindet(Jdomo,1)
         endif
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+        endif
      end do
+      if(holetype(i) == 1)then
+        Jsomo(iint) = IBSET(Jsomo(iint),ipos)
+      else if(holetype(i) == 2)then
+        Jdomo(iint) = IBSET(Jdomo(iint),ipos)
+      endif
   end do
 
   !print *,tableUniqueAlphas(:,:)
 
   ! prune list of alphas
-  Isomo = Icfg(1,1)
-  Idomo = Icfg(1,2)
-  Jsomo = Icfg(1,1)
-  Jdomo = Icfg(1,2)
+    do i=1, N_int
+      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
+      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Jsomo(i) = Isomo(i)
+      Jdomo(i) = Idomo(i)
+    enddo
+
   NalphaIcfg = 0
   do i = 1, nholes
-     p = listholes(i)
-     do j = 1, nvmos
-        q = listvmos(j)
-        if(p .EQ. q) cycle
-        if(tableUniqueAlphas(p,q)) then
-           if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
-              ! SOMO -> VMO
-              Jsomo = IBCLR(Isomo,p-1)
-              Jsomo = IBSET(Jsomo,q-1)
-              Jdomo = Idomo
-           else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
-              ! SOMO -> SOMO
-              Jsomo = IBCLR(Isomo,p-1)
-              Jsomo = IBCLR(Jsomo,q-1)
-              Jdomo = IBSET(Idomo,q-1)
-           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
-              ! DOMO -> VMO
-              Jsomo = IBSET(Isomo,p-1)
-              Jsomo = IBSET(Jsomo,q-1)
-              Jdomo = IBCLR(Idomo,p-1)
-           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
-              ! DOMO -> SOMO
-              Jsomo = IBSET(Isomo,p-1)
-              Jsomo = IBCLR(Jsomo,q-1)
-              Jdomo = IBCLR(Idomo,p-1)
-              Jdomo = IBSET(Jdomo,q-1)
-           else
-              print*,"Something went wrong in obtain_associated_alphaI"
-           endif
+     pp  = listholes(i)
+     iint = shiftr(pp-1,bit_kind_shift) + 1
+     ipos = pp-shiftl((iint-1),bit_kind_shift)-1
+     if(holetype(i) == 1)then
+       Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
+     else if(holetype(i) == 2)then
+       Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+     endif
 
+     do j = 1, nvmos
+        qq  = listvmos(j)
+        jint = shiftr(qq-1,bit_kind_shift) + 1
+        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        if(vmotype(j) == 1)then
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
+        else if(vmotype(j) == 2)then
+          Jdomo(jint) = IBSET(Jdomo(jint),jpos)
+        endif
+        if(pp .EQ. qq) cycle
+        if(tableUniqueAlphas(pp,qq)) then
            ! SOMO
            NalphaIcfg += 1
-           !print *,i,j,"|",NalphaIcfg
-           alphasIcfg(1,1,NalphaIcfg) = Jsomo
-           alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+           alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
+           if(n_core_orb .le. 63)then
+             alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
+           else
+             n_core_orb_64 = n_core_orb
+             do ii=1,N_int
+               if(n_core_orb_64 .gt. 0)then
+                 alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1)
+               else
+                 alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2)
+               endif
+               n_core_orb_64 = ISHFT(n_core_orb_64,-6)
+             end do
+           endif
            !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
         endif
      end do
@@ -1123,12 +1197,22 @@ END_PROVIDER
   Isomo = iand(act_bitmask(1,1),Icfg(1,1))
   Idomo = iand(act_bitmask(1,1),Icfg(1,2))
   do k = 1, idxI-1
-     diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
-     diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
-     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-     ndiffSOMO = POPCNT(diffSOMO)
-     ndiffDOMO = POPCNT(diffDOMO)
-     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+      do ii=1,N_int
+        diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
+        ndiffSOMO += POPCNT(diffSOMO(ii))
+      end do
+      ! ndiffSOMO cannot be 0 (I /= k)
+      ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
+      ! this Icfg could not have been generated before.
+      if (ndiffSOMO /= 2) cycle
+      ndiffDOMO = 0
+      nxordiffSOMODOMO = 0
+      do ii=1,N_int
+        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
+        xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+        ndiffDOMO += POPCNT(diffDOMO(ii))
+        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
+      end do
      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
         ppExistsQ = .TRUE.
         EXIT
@@ -1141,8 +1225,8 @@ END_PROVIDER
      !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg
      !call debug_spindet(Idomo,1)
      !call debug_spindet(Jdomo,1)
-     alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1)
-     alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2)
+     alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1)
+     alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2)
   endif
 
   end subroutine

From 0234e46e1b5efb97e45b74f3dc80f79f0892d855 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Sun, 4 Dec 2022 00:25:15 +0100
Subject: [PATCH 08/35] Fixed alphalist.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 39 +++++++++++++-------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 3794e8bb..2bad88c0 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -584,8 +584,8 @@ use bitmasks
     Jcfg  = psi_configuration(:,:,idxI)
 
     do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
+      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
     enddo
 
     ! find out all pq holes possible
@@ -638,8 +638,8 @@ use bitmasks
 
     ! Now find the allowed (p,q) excitations
     do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Isomo(i) = iand(reunion_of_act_virt_bitmask(i,1),psi_configuration(i,1,idxI))
+      Idomo(i) = iand(reunion_of_act_virt_bitmask(i,2),psi_configuration(i,2,idxI))
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
@@ -659,21 +659,24 @@ use bitmasks
         Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
       else if(holetype(i) == 2)then
         Jdomo(iint) = IBCLR(Jdomo(iint),ipos)
+        Jsomo(iint) = IBSET(Jsomo(iint),ipos)
       endif
 
       do j = 1,nvmos
         qq  = listvmos(j)
+        if(pp.eq.qq) cycle
         jint = shiftr(qq-1,bit_kind_shift) + 1
         jpos = qq-shiftl((iint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
           Jdomo(jint) = IBSET(Jdomo(jint),jpos)
+          Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
         endif
 
         do ii=1, N_int
-          Jcfg(i,1) = Jsomo(i)
-          Jcfg(i,2) = Jdomo(i)
+          Jcfg(ii,1) = Jsomo(ii)
+          Jcfg(ii,2) = Jdomo(ii)
         enddo
 
         call bitstring_to_list(Jcfg,listall,nelall,N_int)
@@ -685,6 +688,7 @@ use bitmasks
             Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
           else if(vmotype(j) == 2)then
             Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+            Jsomo(jint) = IBSET(Jsomo(jint),jpos)
           endif
           cycle
         endif
@@ -730,12 +734,15 @@ use bitmasks
             Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
           else if(vmotype(j) == 2)then
             Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+            Jsomo(jint) = IBSET(Jsomo(jint),jpos)
           endif
           cycle
         endif
 
         pqExistsQ = .FALSE.
 
+        !print *, " ndiffSOMO=",ndiffSOMO, " ndiffDOMO=", ndiffDOMO, " nxordiffSOMODOMO=",nxordiffSOMODOMO, " p=",pp," q=",qq
+
         if(.NOT. pqExistsQ) then
           tableUniqueAlphas(pp,qq) = .TRUE.
         endif
@@ -745,16 +752,18 @@ use bitmasks
           Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
           Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+          Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         endif
       end do
       if(holetype(i) == 1)then
         Jsomo(iint) = IBSET(Jsomo(iint),ipos)
       else if(holetype(i) == 2)then
         Jdomo(iint) = IBSET(Jdomo(iint),ipos)
+        Jsomo(iint) = IBCLR(Jsomo(iint),ipos)
       endif
     end do
 
-    !print *,tableUniqueAlphas(:,:)
+    print *,tableUniqueAlphas(:,:)
 
     ! prune list of alphas
     do i=1, N_int
@@ -763,6 +772,7 @@ use bitmasks
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
+    print *, " Isomo=",Isomo(1), " Idomo=", Idomo(1)
 
     NalphaIcfg = 0
     do i = 1, nholes
@@ -791,21 +801,22 @@ use bitmasks
             Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
           else if(vmotype(j) == 2)then
             Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
+            Jsomo(jint) = IBSET(Jsomo(jint),jpos)
           endif
           cycle
         endif
         if(tableUniqueAlphas(pp,qq)) then
 
         do ii=1, N_int
-          Jcfg(i,1) = Jsomo(i)
-          Jcfg(i,2) = Jdomo(i)
+          Jcfg(ii,1) = Jsomo(ii)
+          Jcfg(ii,2) = Jdomo(ii)
         enddo
 
           call bitstring_to_list(Jcfg,listall,nelall,N_int)
           Nsomo_J = nelall
 
           if(Nsomo_J .ge. NSOMOMin) then
-            !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
+            print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
             if(n_core_orb .le. 63)then
@@ -843,8 +854,8 @@ use bitmasks
     ! Check if this Icfg has been previously generated as a mono
     ppExistsQ = .False.
     do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
+      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
     enddo
 
     kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
@@ -1044,8 +1055,8 @@ END_PROVIDER
         endif
 
         do ii=1, N_int
-          Jcfg(i,1) = Jsomo(i)
-          Jcfg(i,2) = Jdomo(i)
+          Jcfg(ii,1) = Jsomo(ii)
+          Jcfg(ii,2) = Jdomo(ii)
         enddo
 
         call bitstring_to_list(Jcfg,listall,nelall,N_int)

From 3839f05ba02ae02b4c29fde2bd5f1bc2d4531ff2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 7 Dec 2022 09:53:48 +0100
Subject: [PATCH 09/35] Unused variable

---
 external/qp2-dependencies  | 2 +-
 src/csf/sigma_vector.irp.f | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index 90ee61f5..242151e0 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0
+Subproject commit 242151e03d1d6bf042387226431d82d35845686a
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 833fa7b0..2d6f2ee2 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -858,7 +858,6 @@ subroutine calculate_preconditioner_cfg(diag_energies)
   real*8, external  :: mo_two_e_integral
   real*8            :: hpp
   real*8            :: meCC
-  real*8            :: ecore
   real*8            :: core_act_contrib
 
   !PROVIDE h_core_ri
@@ -869,7 +868,6 @@ subroutine calculate_preconditioner_cfg(diag_energies)
   !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num
 
   ! calculate core energy
-  !call get_core_energy(ecore)
   diag_energies = core_energy - nuclear_repulsion
 
   ! calculate the core energy

From 050ee3af5d1c7a6444e4336844b81ec44009b06e Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Wed, 7 Dec 2022 18:19:38 +0100
Subject: [PATCH 10/35] Revert sort from C quicksort

---
 src/scf_utils/roothaan_hall_scf.irp.f |   2 +-
 src/utils/qsort.c                     | 373 ++++++++++++++
 src/utils/qsort.org                   | 169 +++++++
 src/utils/qsort_module.f90            | 347 +++++++++++++
 src/utils/sort.irp.f                  | 695 --------------------------
 5 files changed, 890 insertions(+), 696 deletions(-)
 create mode 100644 src/utils/qsort.c
 create mode 100644 src/utils/qsort.org
 create mode 100644 src/utils/qsort_module.f90

diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 3b9eaeb4..2c35fe0d 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -69,9 +69,9 @@ END_DOC
     if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
 
       ! Store Fock and error matrices at each iteration
+      index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
       do j=1,ao_num
         do i=1,ao_num
-          index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
           Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j)
           error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j)
         enddo
diff --git a/src/utils/qsort.c b/src/utils/qsort.c
new file mode 100644
index 00000000..c011b35a
--- /dev/null
+++ b/src/utils/qsort.c
@@ -0,0 +1,373 @@
+/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
+#include <stdlib.h>
+#include <stdint.h>
+
+struct int16_t_comp {
+  int16_t    x;
+  int32_t i;
+};
+
+int compare_int16_t( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
+}
+
+
+struct int16_t_comp_big {
+  int16_t    x;
+  int64_t i;
+};
+
+int compare_int16_t_big( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
+}
+
+
+struct int32_t_comp {
+  int32_t    x;
+  int32_t i;
+};
+
+int compare_int32_t( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
+}
+
+
+struct int32_t_comp_big {
+  int32_t    x;
+  int64_t i;
+};
+
+int compare_int32_t_big( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
+}
+
+
+struct int64_t_comp {
+  int64_t    x;
+  int32_t i;
+};
+
+int compare_int64_t( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
+}
+
+
+struct int64_t_comp_big {
+  int64_t    x;
+  int64_t i;
+};
+
+int compare_int64_t_big( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
+}
+
+
+struct double_comp {
+  double    x;
+  int32_t i;
+};
+
+int compare_double( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct double_comp* A = malloc(isize * sizeof(struct double_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx(double* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
+}
+
+
+struct double_comp_big {
+  double    x;
+  int64_t i;
+};
+
+int compare_double_big( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx_big(double* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
+}
+
+
+struct float_comp {
+  float    x;
+  int32_t i;
+};
+
+int compare_float( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct float_comp* A = malloc(isize * sizeof(struct float_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx(float* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
+}
+
+
+struct float_comp_big {
+  float    x;
+  int64_t i;
+};
+
+int compare_float_big( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx_big(float* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
+}
+/* Generated C file:1 ends here */
diff --git a/src/utils/qsort.org b/src/utils/qsort.org
new file mode 100644
index 00000000..abf0d54a
--- /dev/null
+++ b/src/utils/qsort.org
@@ -0,0 +1,169 @@
+#+TITLE: Quick sort binding for Fortran
+
+* C template
+
+#+NAME: c_template
+#+BEGIN_SRC c
+struct TYPE_comp_big {
+  TYPE    x;
+  int32_t i;
+};
+
+int compare_TYPE_big( const void * l, const void * r )
+{
+  const TYPE * restrict _l= l;
+  const TYPE * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
+}
+#+END_SRC
+
+* Fortran template
+
+#+NAME:f_template
+#+BEGIN_SRC f90
+subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_big_c
+
+subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_noidx_big_c
+
+#+END_SRC
+
+#+NAME:f_template2
+#+BEGIN_SRC f90
+subroutine Lsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+  call Lsort_big_c(A, iorder, isize)
+end subroutine Lsort_big
+
+subroutine Lsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_TYPE)    :: A(isize)
+  call Lsort_noidx_big_c(A, isize)
+end subroutine Lsort_noidx_big
+
+#+END_SRC
+
+* Python scripts for type replacements
+
+#+NAME: replaced
+#+begin_src python :results output :noweb yes
+data = """
+<<c_template>>
+"""
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f2
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template2>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+* Generated C file
+  
+#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
+#include <stdlib.h>
+#include <stdint.h>
+<<replaced()>>
+#+END_SRC
+
+* Generated Fortran file
+
+#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     <<replaced_f()>>
+  end interface
+
+end module qsort_module
+
+<<replaced_f2()>>
+
+#+END_SRC
+
diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90
new file mode 100644
index 00000000..a72a4f9e
--- /dev/null
+++ b/src/utils/qsort_module.f90
@@ -0,0 +1,347 @@
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     
+     subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_c
+     
+     subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_c
+     
+     
+     
+     subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_big_c
+     
+     subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_big_c
+     
+     
+     
+     subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_c
+     
+     subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_c
+     
+     
+     
+     subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_big_c
+     
+     subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_big_c
+     
+     
+     
+     subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_c
+     
+     subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_c
+     
+     
+     
+     subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_big_c
+     
+     subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_big_c
+     
+     
+     
+     subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_c
+     
+     subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_c
+     
+     
+     
+     subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_big_c
+     
+     subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_big_c
+     
+     
+     
+     subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_c
+     
+     subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_c
+     
+     
+     
+     subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_big_c
+     
+     subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_big_c
+     
+     
+     
+  end interface
+
+end module qsort_module
+
+
+subroutine i2sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_c(A, iorder, isize)
+end subroutine i2sort
+
+subroutine i2sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_c(A, isize)
+end subroutine i2sort_noidx
+
+
+
+subroutine i2sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_big_c(A, iorder, isize)
+end subroutine i2sort_big
+
+subroutine i2sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_big_c(A, isize)
+end subroutine i2sort_noidx_big
+
+
+
+subroutine isort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_c(A, iorder, isize)
+end subroutine isort
+
+subroutine isort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_c(A, isize)
+end subroutine isort_noidx
+
+
+
+subroutine isort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_big_c(A, iorder, isize)
+end subroutine isort_big
+
+subroutine isort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_big_c(A, isize)
+end subroutine isort_noidx_big
+
+
+
+subroutine i8sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_c(A, iorder, isize)
+end subroutine i8sort
+
+subroutine i8sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_c(A, isize)
+end subroutine i8sort_noidx
+
+
+
+subroutine i8sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_big_c(A, iorder, isize)
+end subroutine i8sort_big
+
+subroutine i8sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_big_c(A, isize)
+end subroutine i8sort_noidx_big
+
+
+
+subroutine dsort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_c(A, iorder, isize)
+end subroutine dsort
+
+subroutine dsort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_c(A, isize)
+end subroutine dsort_noidx
+
+
+
+subroutine dsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_big_c(A, iorder, isize)
+end subroutine dsort_big
+
+subroutine dsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_big_c(A, isize)
+end subroutine dsort_noidx_big
+
+
+
+subroutine sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_c(A, iorder, isize)
+end subroutine sort
+
+subroutine sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_c(A, isize)
+end subroutine sort_noidx
+
+
+
+subroutine sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_big_c(A, iorder, isize)
+end subroutine sort_big
+
+subroutine sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_big_c(A, isize)
+end subroutine sort_noidx_big
diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f
index ff40263c..089c3871 100644
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@@ -1,222 +1,4 @@
 BEGIN_TEMPLATE
- subroutine insertion_$Xsort (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  $type                          :: xtmp
-  integer                        :: i, i0, j, jmax
-
-  do i=2,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j=i-1
-    do while (j>0)
-      if ((x(j) <= xtmp)) exit
-      x(j+1) = x(j)
-      iorder(j+1) = iorder(j)
-      j=j-1
-    enddo
-    x(j+1) = xtmp
-    iorder(j+1) = i0
-  enddo
- end subroutine insertion_$Xsort
-
- subroutine quick_$Xsort(x, iorder, isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the quicksort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer, external              :: omp_get_num_threads
-  call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
- end
-
- recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
-  implicit none
-  integer, intent(in)            :: isize, first, last, level
-  integer,intent(inout)          :: iorder(isize)
-  $type, intent(inout)           :: x(isize)
-  $type                          :: c, tmp
-  integer                        :: itmp
-  integer                        :: i, j
-
-  if(isize<2)return
-
-  c = x( shiftr(first+last,1) )
-  i = first
-  j = last
-  do
-    do while (x(i) < c)
-      i=i+1
-    end do
-    do while (c < x(j))
-      j=j-1
-    end do
-    if (i >= j) exit
-    tmp  = x(i)
-    x(i) = x(j)
-    x(j) = tmp
-    itmp      = iorder(i)
-    iorder(i) = iorder(j)
-    iorder(j) = itmp
-    i=i+1
-    j=j-1
-  enddo
-  if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  else
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  endif
- end
-
- subroutine heap_$Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-
-  integer                        :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
- end subroutine heap_$Xsort
-
- subroutine heap_$Xsort_big(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-
-  integer*8                      :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
-
- end subroutine heap_$Xsort_big
 
  subroutine sorted_$Xnumber(x,isize,n)
   implicit none
@@ -250,222 +32,6 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-!---------------------- INTEL
-IRP_IF INTEL
-
-BEGIN_TEMPLATE
- subroutine $Xsort(x,iorder,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
-  deallocate(tmp)
-  iorder(1:isize) = iorder(1:isize)+1
-  call $Xset_order(x,iorder,isize)
- end
-
- subroutine $Xsort_noidx(x,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
-  deallocate(tmp)
- end
-
-SUBST [ X, type, ityp, n, ippsz ]
-   ; real ; 32f ; 4 ; 13 ;;
- i ; integer ; 32s ; 4 ; 11 ;;
- i2 ; integer*2 ; 16s ; 2 ; 7 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call $Xradix_sort(x,iorder,isize,-1)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i8 ; integer*8 ;;
-END_TEMPLATE
-
-!---------------------- END INTEL
-IRP_ELSE
-!---------------------- NON-INTEL
-BEGIN_TEMPLATE
-
- subroutine $Xsort_noidx(x,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer, allocatable           :: iorder(:)
-  integer                        :: i
-  allocate(iorder(isize))
-  do i=1,isize
-   iorder(i)=i
-  enddo
-  call $Xsort(x,iorder,isize)
-  deallocate(iorder)
- end subroutine $Xsort_noidx
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call $Xradix_sort(x,iorder,isize,-1)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-IRP_ENDIF
-!---------------------- END NON-INTEL
-
-
 
 BEGIN_TEMPLATE
  subroutine $Xset_order(x,iorder,isize)
@@ -491,47 +57,6 @@ BEGIN_TEMPLATE
   deallocate(xtmp)
  end
 
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8; integer*8 ;;
- i2; integer*2 ;;
-END_TEMPLATE
-
-
-BEGIN_TEMPLATE
- subroutine insertion_$Xsort_big (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-  $type                          :: xtmp
-  integer*8                      :: i, i0, j, jmax
-
-  do i=2_8,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j = i-1_8
-    do while (j>0_8)
-      if (x(j)<=xtmp) exit
-      x(j+1_8) = x(j)
-      iorder(j+1_8) = iorder(j)
-      j = j-1_8
-    enddo
-    x(j+1_8) = xtmp
-    iorder(j+1_8) = i0
-  enddo
-
- end subroutine insertion_$Xsort_big
-
  subroutine $Xset_order_big(x,iorder,isize)
   implicit none
   BEGIN_DOC
@@ -565,223 +90,3 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-BEGIN_TEMPLATE
-
-recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
-  implicit none
-
-  BEGIN_DOC
-  ! Sort integer array x(isize) using the radix sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! iradix should be -1 in input.
-  END_DOC
-  integer*$int_type, intent(in)  :: isize
-  integer*$int_type, intent(inout) :: iorder(isize)
-  integer*$type, intent(inout)   :: x(isize)
-  integer, intent(in)            :: iradix
-  integer                        :: iradix_new
-  integer*$type, allocatable     :: x2(:), x1(:)
-  integer*$type                  :: i4               ! data type
-  integer*$int_type, allocatable :: iorder1(:),iorder2(:)
-  integer*$int_type              :: i0, i1, i2, i3, i ! index type
-  integer*$type                  :: mask
-  integer                        :: err
-  !DIR$ ATTRIBUTES ALIGN : 128   :: iorder1,iorder2, x2, x1
-
-  if (isize < 2) then
-    return
-  endif
-
-  if (iradix == -1) then ! Sort Positive and negative
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-    do i=1_$int_type,isize
-      if (x(i) < 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = -x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i2
-      iorder(i1+i) = iorder2(i)
-      x(i1+i) = x2(i)
-    enddo
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i1 > 1_$int_type) then
-      call $Xradix_sort$big(x1,iorder1,i1,-2)
-      do i=1_$int_type,i1
-        x(i) = -x1(1_$int_type+i1-i)
-        iorder(i) = iorder1(1_$int_type+i1-i)
-      enddo
-    endif
-
-    if (i2>1_$int_type) then
-      call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
-    endif
-
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-    return
-
-  else if (iradix == -2) then ! Positive
-
-    ! Find most significant bit
-
-    i0 = 0_$int_type
-    i4 = maxval(x)
-
-    iradix_new = max($integer_size-1-leadz(i4),1)
-    mask = ibset(0_$type,iradix_new)
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-
-    do i=1_$int_type,isize
-      if (iand(mask,x(i)) == 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i1
-      iorder(i0+i) = iorder1(i)
-      x(i0+i) = x1(i)
-    enddo
-    i0 = i0+i1
-    i3 = i0
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-
-
-    do i=1_$int_type,i2
-      iorder(i0+i) = iorder2(i)
-      x(i0+i) = x2(i)
-    enddo
-    i0 = i0+i2
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i3>1_$int_type) then
-      call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
-    endif
-
-    if (isize-i3>1_$int_type) then
-      call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
-    endif
-
-    return
-  endif
-
-  ASSERT (iradix >= 0)
-
-  if (isize < 48) then
-    call insertion_$Xsort$big(x,iorder,isize)
-    return
-  endif
-
-
-  allocate(x2(isize),iorder2(isize),stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x1, iorder1'
-    stop
-  endif
-
-
-  mask = ibset(0_$type,iradix)
-  i0=1_$int_type
-  i1=1_$int_type
-
-  do i=1_$int_type,isize
-    if (iand(mask,x(i)) == 0_$type) then
-      iorder(i0) = iorder(i)
-      x(i0) = x(i)
-      i0 = i0+1_$int_type
-    else
-      iorder2(i1) = iorder(i)
-      x2(i1) = x(i)
-      i1 = i1+1_$int_type
-    endif
-  enddo
-  i0=i0-1_$int_type
-  i1=i1-1_$int_type
-
-  do i=1_$int_type,i1
-    iorder(i0+i) = iorder2(i)
-    x(i0+i) = x2(i)
-  enddo
-
-  deallocate(x2,iorder2,stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x2, iorder2'
-    stop
-  endif
-
-
-  if (iradix == 0) then
-    return
-  endif
-
-
-  if (i1>1_$int_type) then
-    call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
-  endif
-  if (i0>1) then
-    call $Xradix_sort$big(x,iorder,i0,iradix-1)
-  endif
-
- end
-
-SUBST [ X, type, integer_size, is_big, big, int_type ]
- i  ; 4 ; 32 ; .False. ;      ; 4 ;;
- i8 ; 8 ; 64 ; .False. ;      ; 4 ;;
- i2 ; 2 ; 16 ; .False. ;      ; 4 ;;
- i  ; 4 ; 32 ; .True.  ; _big ; 8 ;;
- i8 ; 8 ; 64 ; .True.  ; _big ; 8 ;;
-END_TEMPLATE
-
-
-

From e9e829daa3b90301effb084159ef5f8703b2f8e0 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Thu, 8 Dec 2022 15:51:44 +0100
Subject: [PATCH 11/35] typo doc rohf

---
 src/scf_utils/fock_matrix.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f
index 9a95caa1..539f1eb3 100644
--- a/src/scf_utils/fock_matrix.irp.f
+++ b/src/scf_utils/fock_matrix.irp.f
@@ -15,7 +15,7 @@
    ! 
    ! Rcc = Acc Fcc^a + Bcc Fcc^b
    ! Roo = Aoo Foo^a + Boo Foo^b
-   ! Rcc = Avv Fvv^a + Bvv Fvv^b
+   ! Rvv = Avv Fvv^a + Bvv Fvv^b
    ! Fcv = (F^a + F^b)/2
    ! 
    ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO)

From 5dcf28bb3a377f16667bdd0aa8d48203fac25db3 Mon Sep 17 00:00:00 2001
From: Peter Reinhardt <peter.reinhardt@upmc.fr>
Date: Fri, 9 Dec 2022 20:06:27 +0100
Subject: [PATCH 12/35] Added ordering of psi_det_sorted_bit

---
 src/determinants/determinants.irp.f | 50 +++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/determinants/determinants.irp.f b/src/determinants/determinants.irp.f
index eceab58c..cb6c14a2 100644
--- a/src/determinants/determinants.irp.f
+++ b/src/determinants/determinants.irp.f
@@ -329,6 +329,7 @@ END_PROVIDER
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit, (N_int,2,psi_det_size) ]
 &BEGIN_PROVIDER [ double precision, psi_coef_sorted_bit, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ integer, psi_det_sorted_bit_order, (psi_det_size) ]
    implicit none
    BEGIN_DOC
    ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
@@ -337,8 +338,8 @@ END_PROVIDER
    ! function.
    END_DOC
 
-   call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1),       &
-       psi_det_sorted_bit, psi_coef_sorted_bit, N_states)
+   call sort_dets_by_det_search_key_ordered(N_det, psi_det, psi_coef, size(psi_coef,1),       &
+       psi_det_sorted_bit, psi_coef_sorted_bit, N_states, psi_det_sorted_bit_order)
 
 END_PROVIDER
 
@@ -1005,3 +1006,48 @@ BEGIN_PROVIDER [ double precision, psi_det_Hii, (N_det) ]
 END_PROVIDER
 
 
+subroutine sort_dets_by_det_search_key_ordered(Ndet, det_in, coef_in, sze, det_out, coef_out, N_st, iorder)
+   use bitmasks
+   implicit none
+   integer, intent(in)            :: Ndet, N_st, sze
+   integer(bit_kind), intent(in)  :: det_in  (N_int,2,sze)
+   double precision , intent(in)  :: coef_in(sze,N_st)
+   integer(bit_kind), intent(out) :: det_out (N_int,2,sze)
+   double precision , intent(out) :: coef_out(sze,N_st)
+   integer, intent(out)           :: iorder(sze)
+   BEGIN_DOC
+   ! Determinants are sorted according to their :c:func:`det_search_key`.
+   ! Useful to accelerate the search of a random determinant in the wave
+   ! function.
+   !
+   ! /!\ The first dimension of coef_out and coef_in need to be psi_det_size
+   !
+   END_DOC
+   integer                        :: i,j,k
+   integer*8, allocatable         :: bit_tmp(:)
+   integer*8, external            :: det_search_key
+
+   allocate ( bit_tmp(Ndet) )
+
+   do i=1,Ndet
+     iorder(i) = i
+     !$DIR FORCEINLINE
+     bit_tmp(i) = det_search_key(det_in(1,1,i),N_int)
+   enddo
+   call i8sort(bit_tmp,iorder,Ndet)
+   !DIR$ IVDEP
+   do i=1,Ndet
+     do j=1,N_int
+       det_out(j,1,i) = det_in(j,1,iorder(i))
+       det_out(j,2,i) = det_in(j,2,iorder(i))
+     enddo
+     do k=1,N_st
+       coef_out(i,k) = coef_in(iorder(i),k)
+     enddo
+   enddo
+
+   deallocate(bit_tmp)
+
+end
+
+

From 4b3b6300ef8f76e82c4ddb83669665aa945d1ea2 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Sat, 10 Dec 2022 11:36:12 +0100
Subject: [PATCH 13/35] Working on obtain I for alpha.

---
 src/csf/obtain_I_foralpha.irp.f | 450 +++++++++-----------------------
 1 file changed, 119 insertions(+), 331 deletions(-)

diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index 5fd630fc..a5a4164d 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -315,86 +315,35 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            !print *,"obt SOMO -> VMO"
            extyp = 3
            if(N_int .eq. 1) then
-           IJsomo = IEOR(Isomo, Jsomo)
-!IRP_IF WITHOUT_TRAILZ
-!           p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1
-!IRP_ELSE
-           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
-!IRP_ENDIF
-           IJsomo = IBCLR(IJsomo,p-1)
-!IRP_IF WITHOUT_TRAILZ
-!           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
-!IRP_ELSE
-           q = TRAILZ(IJsomo) + 1
-!IRP_ENDIF
-           !print *," p=",p," q=",q
-           !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int)
-         else
-  exc = 0
-  do ii = 1,2
-    ishift = 1-bit_kind_size
-    do l=1,N_int
-      ishift = ishift + bit_kind_size
-      if (Jcfg(l,ii) == Icfg(l,ii)) then
-        cycle
-      endif
-      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
-      particle = iand(tmp, Icfg(l,ii))
-      hole     = iand(tmp, Jcfg(l,ii))
-      if (particle /= 0_bit_kind) then
-        tz = trailz(particle)
-        exc(0,2,ii) = 1
-        exc(1,2,ii) = tz+ishift
-        !print *,"part ",tz+ishift, " ii=",ii, exc(1,2,2)
-      endif
-      if (hole /= 0_bit_kind) then
-        tz = trailz(hole)
-        exc(0,1,ii) = 1
-        exc(1,1,ii) = tz+ishift
-        !print *,"hole ",tz+ishift, " ii=",ii, exc(1,1,2)
-      endif
-
-      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
-        cycle
-      endif
-
-      high = max(exc(1,1,ii), exc(1,2,ii))-1
-      low  = min(exc(1,1,ii), exc(1,2,ii))
-
-      ASSERT (low >= 0)
-      ASSERT (high > 0)
-
-      k = shiftr(high,bit_kind_shift)+1
-      j = shiftr(low,bit_kind_shift)+1
-      m = iand(high,bit_kind_size-1)
-      n = iand(low,bit_kind_size-1)
-
-      if (j==k) then
-        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
-            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
-                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
-      else
-        nperm = nperm + popcnt(                                    &
-             iand(Jcfg(j,ii),                                   &
-                  iand(not(0_bit_kind),                            &
-                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
-             + popcnt(iand(Jcfg(k,ii),                          &
-                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
-
-        do iii=j+1,k-1
-          nperm = nperm + popcnt(Jcfg(iii,ii))
-        end do
-
-      endif
-
-      ! Set p and q
-      q = max(exc(1,1,1),exc(1,1,2))
-      p = max(exc(1,2,1),exc(1,2,2))
-      exit
-
-    enddo
-  enddo
-endif
+             IJsomo = IEOR(Isomo, Jsomo)
+             p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+             IJsomo = IBCLR(IJsomo,p-1)
+             q = TRAILZ(IJsomo) + 1
+             !print *," p=",p," q=",q
+             !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int)
+           else
+             ! Find p
+             do ii=1,N_int
+               Isomo = Ialpha(ii,1)
+               Jsomo = psi_configuration(ii,1,i)
+               IJsomo = IEOR(Isomo, Jsomo)
+               if(popcnt(IAND(Isomo,IJsomo)) > 0)then
+                 p = TRAILZ(IAND(Isomo,IJsomo)) + 1 + ii * bit_kind_size
+                 EXIT
+               endif
+             end do
+             ! Find q
+             do ii=1,N_int
+               Isomo = Ialpha(ii,1)
+               Jsomo = psi_configuration(ii,1,i)
+               IJsomo = IEOR(Isomo, Jsomo)
+               IJsomo = IBCLR(IJsomo,p-1)
+               if(popcnt(IJsomo) > 0)then
+                 q = TRAILZ(IJsomo) + 1 + ii * bit_kind_size
+                 EXIT
+               endif
+             enddo
+           endif
            !assert ( p == pp)
            !assert ( q == qq)
            !print *," --- p=",p," q=",q
@@ -409,88 +358,35 @@ endif
               !print *,"obt DOMO -> VMO"
               extyp = 2
               if(N_int.eq.1)then
-!IRP_IF WITHOUT_TRAILZ
-!              p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1
-!IRP_ELSE
-              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
-!IRP_ENDIF
-              Isomo = IEOR(Isomo, Jsomo)
-              Isomo = IBCLR(Isomo,p-1)
-!IRP_IF WITHOUT_TRAILZ
-!              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
-!IRP_ELSE
-              q = TRAILZ(Isomo) + 1
-!IRP_ENDIF
-  else
-    exc=0
-  exc(0,1,1) = 0
-  exc(0,2,1) = 0
-  exc(0,1,2) = 0
-  exc(0,2,2) = 0
-  do ii = 1,2
-    ishift = 1-bit_kind_size
-    do l=1,N_int
-      ishift = ishift + bit_kind_size
-      if (Jcfg(l,ii) == Icfg(l,ii)) then
-        cycle
-      endif
-      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
-      particle = iand(tmp, Icfg(l,ii))
-      hole     = iand(tmp, Jcfg(l,ii))
-      if (particle /= 0_bit_kind) then
-        tz = trailz(particle)
-        exc(0,2,ii) = 1
-        exc(1,2,ii) = tz+ishift
-        !print *,"part ",tz+ishift, " ii=",ii
-      endif
-      if (hole /= 0_bit_kind) then
-        tz = trailz(hole)
-        exc(0,1,ii) = 1
-        exc(1,1,ii) = tz+ishift
-        !print *,"hole ",tz+ishift, " ii=",ii
-      endif
+                p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+                Isomo = IEOR(Isomo, Jsomo)
+                Isomo = IBCLR(Isomo,p-1)
+                q = TRAILZ(Isomo) + 1
+              else
 
-      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
-        cycle
-      endif
-
-      high = max(exc(1,1,ii), exc(1,2,ii))-1
-      low  = min(exc(1,1,ii), exc(1,2,ii))
-
-      ASSERT (low >= 0)
-      ASSERT (high > 0)
-
-      k = shiftr(high,bit_kind_shift)+1
-      j = shiftr(low,bit_kind_shift)+1
-      m = iand(high,bit_kind_size-1)
-      n = iand(low,bit_kind_size-1)
-
-      if (j==k) then
-        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
-            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
-                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
-      else
-        nperm = nperm + popcnt(                                    &
-             iand(Jcfg(j,ii),                                   &
-                  iand(not(0_bit_kind),                            &
-                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
-             + popcnt(iand(Jcfg(k,ii),                          &
-                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
-
-        do iii=j+1,k-1
-          nperm = nperm + popcnt(Jcfg(iii,ii))
-        end do
-
-      endif
-
-      ! Set p and q
-      q = max(exc(1,1,1),exc(1,1,2))
-      p = max(exc(1,2,1),exc(1,2,2))
-      exit
-
-    enddo
-  enddo
-endif
+                ! Find p
+                do ii=1,N_int
+                  Isomo = Ialpha(ii,1)
+                  Jsomo = psi_configuration(ii,1,i)
+                  Idomo = Ialpha(ii,2)
+                  Jdomo = psi_configuration(ii,2,i)
+                  if(popcnt(IEOR(Idomo,Jdomo)) > 0)then
+                    p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ii * bit_kind_size
+                    EXIT
+                  endif
+                end do
+                ! Find q
+                do ii=1,N_int
+                  Isomo = Ialpha(ii,1)
+                  Jsomo = psi_configuration(ii,1,i)
+                  Isomo = IEOR(Isomo, Jsomo)
+                  Isomo = IBCLR(Isomo,p-1)
+                  if(popcnt(Isomo) > 0)then
+                    q = TRAILZ(Isomo) + 1 + ii * bit_kind_size
+                    EXIT
+                  endif
+                end do
+              endif
            !assert ( p == pp)
            !assert ( q == qq)
            else
@@ -498,183 +394,75 @@ endif
               !print *,"obt SOMO -> SOMO"
               extyp = 1
               if(N_int.eq.1)then
-!IRP_IF WITHOUT_TRAILZ
-!              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
-!IRP_ELSE
-              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
-!IRP_ENDIF
-              Isomo = IEOR(Isomo, Jsomo)
-              Isomo = IBCLR(Isomo,q-1)
-!IRP_IF WITHOUT_TRAILZ
-!              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
-!IRP_ELSE
-              p = TRAILZ(Isomo) + 1
-!IRP_ENDIF
-              ! Check for Minimal alpha electrons (MS)
-              !if(POPCNT(Isomo).lt.MS)then
-              !  cycle
-              !endif
-  else
-  exc=0
-  exc(0,1,1) = 0
-  exc(0,2,1) = 0
-  exc(0,1,2) = 0
-  exc(0,2,2) = 0
-  do ii = 1,2
-    ishift = 1-bit_kind_size
-    do l=1,N_int
-      ishift = ishift + bit_kind_size
-      if (Jcfg(l,ii) == Icfg(l,ii)) then
-        cycle
-      endif
-      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
-      particle = iand(tmp, Icfg(l,ii))
-      hole     = iand(tmp, Jcfg(l,ii))
-      if (particle /= 0_bit_kind) then
-        tz = trailz(particle)
-        exc(0,2,ii) = 1
-        exc(1,2,ii) = tz+ishift
-        !print *,"part ",tz+ishift, " ii=",ii
-      endif
-      if (hole /= 0_bit_kind) then
-        tz = trailz(hole)
-        exc(0,1,ii) = 1
-        exc(1,1,ii) = tz+ishift
-        !print *,"hole ",tz+ishift, " ii=",ii
-      endif
-
-      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
-        cycle
-      endif
-
-      high = max(exc(1,1,ii), exc(1,2,ii))-1
-      low  = min(exc(1,1,ii), exc(1,2,ii))
-
-      ASSERT (low >= 0)
-      ASSERT (high > 0)
-
-      k = shiftr(high,bit_kind_shift)+1
-      j = shiftr(low,bit_kind_shift)+1
-      m = iand(high,bit_kind_size-1)
-      n = iand(low,bit_kind_size-1)
-
-      if (j==k) then
-        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
-            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
-                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
-      else
-        nperm = nperm + popcnt(                                    &
-             iand(Jcfg(j,ii),                                   &
-                  iand(not(0_bit_kind),                            &
-                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
-             + popcnt(iand(Jcfg(k,ii),                          &
-                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
-
-        do iii=j+1,k-1
-          nperm = nperm + popcnt(Jcfg(iii,ii))
-        end do
-
-      endif
-
-      ! Set p and q
-      q = max(exc(1,1,1),exc(1,1,2))
-      p = max(exc(1,2,1),exc(1,2,2))
-      exit
-
-    enddo
-  enddo
-endif
+                q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+                Isomo = IEOR(Isomo, Jsomo)
+                Isomo = IBCLR(Isomo,q-1)
+                p = TRAILZ(Isomo) + 1
+                ! Check for Minimal alpha electrons (MS)
+                !if(POPCNT(Isomo).lt.MS)then
+                !  cycle
+                !endif
+              else
+                ! Find p
+                do ii=1,N_int
+                  Isomo = Ialpha(ii,1)
+                  Jsomo = psi_configuration(ii,1,i)
+                  Idomo = Ialpha(ii,2)
+                  Jdomo = psi_configuration(ii,2,i)
+                  if(popcnt(IEOR(Idomo,Jdomo)) > 0)then
+                    q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ii * bit_kind_size
+                    EXIT
+                  endif
+                enddo
+                ! Find q
+                do ii=1,N_int
+                  Isomo = Ialpha(ii,1)
+                  Jsomo = psi_configuration(ii,1,i)
+                  Isomo = IEOR(Isomo, Jsomo)
+                  Isomo = IBCLR(Isomo,q-1)
+                  if(popcnt(Isomo) > 0)then
+                    p = TRAILZ(Isomo) + 1 + ii * bit_kind_size
+                    EXIT
+                  endif
+                enddo
+              endif
            !assert ( p == pp)
            !assert ( q == qq)
-           end if
+           endif
         case (2)
            ! DOMO -> SOMO
            !print *,"obt DOMO -> SOMO"
            extyp = 4
            if(N_int.eq.1)then
-           IJsomo = IEOR(Isomo, Jsomo)
-!IRP_IF WITHOUT_TRAILZ
-!           p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1
-!IRP_ELSE
-           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
-!IRP_ENDIF
-           IJsomo = IBCLR(IJsomo,p-1)
-!IRP_IF WITHOUT_TRAILZ
-!           q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1
-!IRP_ELSE
-           q = TRAILZ(IJsomo) + 1
-!IRP_ENDIF
-
-  else
-  exc=0
-  exc(0,1,1) = 0
-  exc(0,2,1) = 0
-  exc(0,1,2) = 0
-  exc(0,2,2) = 0
-  do ii = 1,2
-    ishift = 1-bit_kind_size
-    do l=1,N_int
-      ishift = ishift + bit_kind_size
-      if (Jcfg(l,ii) == Icfg(l,ii)) then
-        cycle
-      endif
-      tmp = xor( Jcfg(l,ii), Icfg(l,ii) )
-      particle = iand(tmp, Icfg(l,ii))
-      hole     = iand(tmp, Jcfg(l,ii))
-      if (particle /= 0_bit_kind) then
-        tz = trailz(particle)
-        exc(0,2,ii) = 1
-        exc(1,2,ii) = tz+ishift
-        !print *,"part ",tz+ishift, " ii=",ii
-      endif
-      if (hole /= 0_bit_kind) then
-        tz = trailz(hole)
-        exc(0,1,ii) = 1
-        exc(1,1,ii) = tz+ishift
-        !print *,"hole ",tz+ishift, " ii=",ii
-      endif
-
-      if ( iand(exc(0,1,ii),exc(0,2,ii)) /= 1) then  ! exc(0,1,ii)/=1 and exc(0,2,ii) /= 1
-        cycle
-      endif
-
-      high = max(exc(1,1,ii), exc(1,2,ii))-1
-      low  = min(exc(1,1,ii), exc(1,2,ii))
-
-      ASSERT (low >= 0)
-      ASSERT (high > 0)
-
-      k = shiftr(high,bit_kind_shift)+1
-      j = shiftr(low,bit_kind_shift)+1
-      m = iand(high,bit_kind_size-1)
-      n = iand(low,bit_kind_size-1)
-
-      if (j==k) then
-        nperm = nperm + popcnt(iand(Jcfg(j,ii),           &
-            iand( shiftl(1_bit_kind,m)-1_bit_kind,            &
-                  not(shiftl(1_bit_kind,n))+1_bit_kind)) )
-      else
-        nperm = nperm + popcnt(                                    &
-             iand(Jcfg(j,ii),                                   &
-                  iand(not(0_bit_kind),                            &
-                       (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) &
-             + popcnt(iand(Jcfg(k,ii),                          &
-                           (shiftl(1_bit_kind,m) - 1_bit_kind ) ))
-
-        do iii=j+1,k-1
-          nperm = nperm + popcnt(Jcfg(iii,ii))
-        end do
-
-      endif
-
-      ! Set p and q
-      q = max(exc(1,1,1),exc(1,1,2))
-      p = max(exc(1,2,1),exc(1,2,2))
-      exit
-
-    enddo
-  enddo
-endif
+             IJsomo = IEOR(Isomo, Jsomo)
+             p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+             IJsomo = IBCLR(IJsomo,p-1)
+             q = TRAILZ(IJsomo) + 1
+           else
+             ! Find p
+             do ii=1,N_int
+               Isomo = Ialpha(ii,1)
+               Jsomo = psi_configuration(ii,1,i)
+               Idomo = Ialpha(ii,2)
+               Jdomo = psi_configuration(ii,2,i)
+               IJsomo = IEOR(Isomo, Jsomo)
+               if(popcnt(IAND(Jsomo,IJsomo)) > 0)then
+                 p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 + ii * bit_kind_size
+                 EXIT
+               endif
+             enddo
+             ! Find q
+             do ii=1,N_int
+               Isomo = Ialpha(ii,1)
+               Jsomo = psi_configuration(ii,1,i)
+               IJsomo = IEOR(Isomo, Jsomo)
+               IJsomo = IBCLR(IJsomo,p-1)
+               if(popcnt(IJsomo) > 0)then
+                 q = TRAILZ(IJsomo) + 1 + ii * bit_kind_size
+                 EXIT
+               endif
+             enddo
+           endif
            !assert ( p == pp)
            !assert ( q == qq)
         case default

From f291078945a5c849f389fe6eea4bb0dae198dd9f Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Sat, 10 Dec 2022 17:11:19 +0100
Subject: [PATCH 14/35] Fixed bug in config CI.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 7 ++++---
 src/csf/obtain_I_foralpha.irp.f              | 4 ++++
 src/csf/sigma_vector.irp.f                   | 6 +++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 2bad88c0..8fb8383e 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -763,7 +763,7 @@ use bitmasks
       endif
     end do
 
-    print *,tableUniqueAlphas(:,:)
+    !print *,tableUniqueAlphas(:,:)
 
     ! prune list of alphas
     do i=1, N_int
@@ -772,7 +772,7 @@ use bitmasks
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
-    print *, " Isomo=",Isomo(1), " Idomo=", Idomo(1)
+    !print *, " Isomo=",Isomo(1), " Idomo=", Idomo(1)
 
     NalphaIcfg = 0
     do i = 1, nholes
@@ -816,7 +816,7 @@ use bitmasks
           Nsomo_J = nelall
 
           if(Nsomo_J .ge. NSOMOMin) then
-            print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
+            !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
             if(n_core_orb .le. 63)then
@@ -861,6 +861,7 @@ use bitmasks
     kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
     ndiffDOMO = 0
     do k = kstart, idxI-1
+      ndiffSOMO = 0
       do ii=1,N_int
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
         ndiffSOMO += POPCNT(diffSOMO(ii))
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index a5a4164d..8606f556 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -482,6 +482,9 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
         diagfactors(nconnectedI) = 1.0d0
      else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
         ! find out all pq holes possible
+        !print *,"I = ",i
+        !print *,"I somo= ",psi_configuration(1,1,i), " domo=", psi_configuration(1,2,i)
+        !print *,"alp somo= ",Ialpha(1,1), " domo=", Ialpha(1,2)
         nholes = 0
         ! holes in SOMO
         !Isomo = psi_configuration(1,1,i)
@@ -549,6 +552,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
               excitationTypes(nconnectedI) = extyp
               diagfactors(nconnectedI) = 2.0d0
            endif
+           !print *,excitationIds(1,nconnectedI), excitationIds(2,nconnectedI)
         enddo
      endif
   end do
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 207de336..76f9bfc3 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -1654,7 +1654,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
 
      ! TODO : remove doubly excited for return
-     !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5)
+     !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5), "Nalphas_Icfg=",Nalphas_Icfg
      do k = 1,Nalphas_Icfg
         ! Now generate all singly excited with respect to a given alpha CFG
 
@@ -1807,6 +1807,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   !$OMP END DO
 
   !$OMP END PARALLEL
+  !print *," ----- "
+  !do i=1,sze
+  !  print *,"i=",i," psi_out(i)=",psi_out(1,i)
+  !end do
   call omp_set_max_active_levels(4)
 
   deallocate(diag_energies)

From 5622b9790da599e15880d7aa0362db13b49e8ed3 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Thu, 15 Dec 2022 17:15:30 +0100
Subject: [PATCH 15/35] Fixed some bugs. Diagonal energy is OK. Some bugs still
 present.

---
 src/csf/configuration_CI_sigma_helpers.irp.f |  55 ++--
 src/csf/conversion.irp.f                     |   1 +
 src/csf/obtain_I_foralpha.irp.f              |  72 +++--
 src/csf/sigma_vector.irp.f                   | 282 +++++++++++++++----
 src/davidson/diagonalization_hcfg.irp.f      |  35 ++-
 5 files changed, 329 insertions(+), 116 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 8fb8383e..19533bd5 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -524,7 +524,7 @@ use bitmasks
 !!!
 !!!END_PROVIDER
 
- BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
+ BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*12)]
 &BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
   implicit none
   !use bitmasks
@@ -549,7 +549,8 @@ use bitmasks
   integer(bit_kind)              :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) 
   integer(bit_kind)              :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int)
   integer(bit_kind)              :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int)
-  integer(bit_kind)              :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
+  !integer(bit_kind)              :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
+  integer(bit_kind)              :: diffDOMO, xordiffSOMODOMO, diffSOMO
   integer                        :: ndiffSOMO
   integer                        :: ndiffDOMO
   integer                        :: nxordiffSOMODOMO
@@ -674,13 +675,15 @@ use bitmasks
           Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
         endif
 
+        Nsomo_J=0
         do ii=1, N_int
           Jcfg(ii,1) = Jsomo(ii)
           Jcfg(ii,2) = Jdomo(ii)
+          Nsomo_J += POPCNT(Jsomo(ii))
         enddo
 
-        call bitstring_to_list(Jcfg,listall,nelall,N_int)
-        Nsomo_J = nelall
+        !call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int)
+        !Nsomo_J = nelall
 
         ! Check for Minimal alpha electrons (MS)
         if(Nsomo_J.lt.MS)then
@@ -705,15 +708,13 @@ use bitmasks
           ndiffDOMO = 0
           nxordiffSOMODOMO = 0
           do ii = 1, N_int
-            Jsomo = Jcfg(ii,1)
-            Jdomo = Jcfg(ii,2)
-            diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
-            ndiffSOMO += POPCNT(diffSOMO(ii))
-            diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+            diffSOMO = IEOR(Jcfg(ii,1),iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+            ndiffSOMO += POPCNT(diffSOMO)
+            diffDOMO = IEOR(Jcfg(ii,2),iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
             xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-            ndiffDOMO += POPCNT(diffDOMO(ii))
-            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
-            nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+            ndiffDOMO += POPCNT(diffDOMO)
+            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+            nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO)
           end do
 
           if((ndiffSOMO .ne. 0) .and. (ndiffSOMO .ne. 2)) cycle
@@ -818,6 +819,7 @@ use bitmasks
           if(Nsomo_J .ge. NSOMOMin) then
             !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
+            !print *," Idx = ",idxI, " Nalpha=",NalphaIcfg
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
             if(n_core_orb .le. 63)then
               alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
@@ -864,7 +866,7 @@ use bitmasks
       ndiffSOMO = 0
       do ii=1,N_int
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
-        ndiffSOMO += POPCNT(diffSOMO(ii))
+        ndiffSOMO += POPCNT(diffSOMO)
       end do
       ! ndiffSOMO cannot be 0 (I /= k)
       ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
@@ -875,8 +877,8 @@ use bitmasks
       do ii=1,N_int
         diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
         xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-        ndiffDOMO += POPCNT(diffDOMO(ii))
-        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
+        ndiffDOMO += POPCNT(diffDOMO)
+        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
       end do
       if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
         ppExistsQ = .TRUE.
@@ -926,7 +928,8 @@ END_PROVIDER
   integer(bit_kind)                  :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) 
   integer(bit_kind)                  :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int)
   integer(bit_kind)                  :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int)
-  integer(bit_kind)                  :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
+  !integer(bit_kind)                  :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int)
+  integer(bit_kind)                  :: diffDOMO, xordiffSOMODOMO, diffSOMO
   integer                            :: ndiffSOMO
   integer                            :: ndiffDOMO
   integer                            :: nxordiffSOMODOMO
@@ -1084,15 +1087,13 @@ END_PROVIDER
           ndiffDOMO = 0
           nxordiffSOMODOMO = 0
           do ii = 1, N_int
-            Jsomo = Jcfg(ii,1)
-            Jdomo = Jcfg(ii,2)
-            diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
-            ndiffSOMO += POPCNT(diffSOMO(ii))
-            diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+            diffSOMO = IEOR(Jcfg(ii,1),iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+            ndiffSOMO += POPCNT(diffSOMO)
+            diffDOMO = IEOR(Jcfg(ii,2),iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
             xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-            ndiffDOMO += POPCNT(diffDOMO(ii))
-            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
-            nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+            ndiffDOMO += POPCNT(diffDOMO)
+            nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
+            nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO)
           end do
            if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
            !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
@@ -1211,7 +1212,7 @@ END_PROVIDER
   do k = 1, idxI-1
       do ii=1,N_int
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
-        ndiffSOMO += POPCNT(diffSOMO(ii))
+        ndiffSOMO += POPCNT(diffSOMO)
       end do
       ! ndiffSOMO cannot be 0 (I /= k)
       ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
@@ -1222,8 +1223,8 @@ END_PROVIDER
       do ii=1,N_int
         diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
         xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-        ndiffDOMO += POPCNT(diffDOMO(ii))
-        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO(ii))
+        ndiffDOMO += POPCNT(diffDOMO)
+        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
       end do
      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
         ppExistsQ = .TRUE.
diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f
index 494c3bfa..7c6c8363 100644
--- a/src/csf/conversion.irp.f
+++ b/src/csf/conversion.irp.f
@@ -114,6 +114,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   integer                        :: idx
   integer MS
   MS = elec_alpha_num-elec_beta_num
+  print *,"size=",size(tmp_psi_coef_det,1)," ",size(tmp_psi_coef_det,2)
 
   countcsf = 0
 
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index 8606f556..1d4e81fc 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -102,7 +102,7 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI,
        xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
        ndiffDOMO += POPCNT(diffDOMO)
        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
-       nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+       nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO)
      end do
 
      if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
@@ -243,13 +243,16 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
   integer                        :: listholes(mo_num)
   integer                        :: holetype(mo_num)
   integer                        :: end_index, ishift
-  integer                        :: Nsomo_alpha, pp,qq, nperm
+  integer                        :: Nsomo_alpha, pp,qq, nperm, iint, ipos
   integer*8                      :: MS
   integer                        :: exc(0:2,2,2), tz, m, n, high, low
   integer                        :: listall(N_int*bit_kind_size), nelall
+  integer                        :: nconnectedExtradiag, nconnectedDiag
   integer(bit_kind)              :: hole, particle, tmp
   MS = elec_alpha_num-elec_beta_num
 
+  nconnectedExtradiag=0
+  nconnectedDiag=0
   nconnectedI = 0
   end_index = N_configuration
 
@@ -260,10 +263,12 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
   !Nsomo_alpha = POPCNT(Isomo)
   Icfg = Ialpha
   Nsomo_alpha = 0
+  !print *," Ialpha="
   do i=1,N_int
     Isomo = Ialpha(i,1)
     Idomo = Ialpha(i,2)
     Nsomo_alpha += POPCNT(Isomo)
+    !print *,Isomo, Idomo, "Nsomo=",Nsomo_alpha
   end do
   end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
   if(end_index .LT. 0) end_index= N_configuration
@@ -293,20 +298,25 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
      ndiffSOMO = 0
      ndiffDOMO = 0
      nxordiffSOMODOMO = 0
+     nsomoJ=0
+     nsomoalpha=0
      do ii=1,N_int
        Isomo = Ialpha(ii,1)
        Idomo = Ialpha(ii,2)
        Jsomo = psi_configuration(ii,1,i)
        Jdomo = psi_configuration(ii,2,i)
+       nsomoJ += POPCNT(Jsomo)
+       nsomoalpha += POPCNT(Isomo)
        diffSOMO = IEOR(Isomo,Jsomo)
        ndiffSOMO += POPCNT(diffSOMO)
        diffDOMO = IEOR(Idomo,Jdomo)
        xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
        ndiffDOMO += POPCNT(diffDOMO)
        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
-       nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+       nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO)
      end do
-     Jcfg = psi_configuration(:,:,i)
+     !Jcfg = psi_configuration(:,:,i)
+     !print *,"nxordiffSOMODOMO(4)=",nxordiffSOMODOMO, " ndiffSOMO(2)=",ndiffSOMO
 
      if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
         select case(ndiffDOMO)
@@ -328,7 +338,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                Jsomo = psi_configuration(ii,1,i)
                IJsomo = IEOR(Isomo, Jsomo)
                if(popcnt(IAND(Isomo,IJsomo)) > 0)then
-                 p = TRAILZ(IAND(Isomo,IJsomo)) + 1 + ii * bit_kind_size
+                 p = TRAILZ(IAND(Isomo,IJsomo)) + 1 + (ii-1) * bit_kind_size
                  EXIT
                endif
              end do
@@ -337,22 +347,24 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                Isomo = Ialpha(ii,1)
                Jsomo = psi_configuration(ii,1,i)
                IJsomo = IEOR(Isomo, Jsomo)
-               IJsomo = IBCLR(IJsomo,p-1)
+               iint = shiftr(p-1,bit_kind_shift) + 1
+               ipos = p-shiftl((iint-1),bit_kind_shift)
+               if(iint .eq. ii)then
+                 IJsomo = IBCLR(IJsomo,ipos-1)
+               endif
                if(popcnt(IJsomo) > 0)then
-                 q = TRAILZ(IJsomo) + 1 + ii * bit_kind_size
+                 q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size
                  EXIT
                endif
              enddo
            endif
            !assert ( p == pp)
            !assert ( q == qq)
-           !print *," --- p=",p," q=",q
+           !print *," 1--- p=",p," q=",q
         case (1)
            ! DOMO -> VMO
            ! or
            ! SOMO -> SOMO
-           nsomoJ = POPCNT(Jsomo)
-           nsomoalpha = POPCNT(Isomo)
            if(nsomoJ .GT. nsomoalpha) then
               ! DOMO -> VMO
               !print *,"obt DOMO -> VMO"
@@ -371,7 +383,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                   Idomo = Ialpha(ii,2)
                   Jdomo = psi_configuration(ii,2,i)
                   if(popcnt(IEOR(Idomo,Jdomo)) > 0)then
-                    p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ii * bit_kind_size
+                    p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 end do
@@ -380,9 +392,13 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                   Isomo = Ialpha(ii,1)
                   Jsomo = psi_configuration(ii,1,i)
                   Isomo = IEOR(Isomo, Jsomo)
-                  Isomo = IBCLR(Isomo,p-1)
+                  iint = shiftr(p-1,bit_kind_shift) + 1
+                  ipos = p-shiftl((iint-1),bit_kind_shift)
+                  if(iint .eq. ii)then
+                    Isomo = IBCLR(Isomo,ipos-1)
+                  endif
                   if(popcnt(Isomo) > 0)then
-                    q = TRAILZ(Isomo) + 1 + ii * bit_kind_size
+                    q = TRAILZ(Isomo) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 end do
@@ -404,13 +420,16 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                 !endif
               else
                 ! Find p
+                !print *,"Ialpha somo=",Ialpha(1,1), Ialpha(2,1)," Ialpha domo=",Ialpha(1,2), Ialpha(2,2)
+                !print *,"J somo=",psi_configuration(1,1,i), psi_configuration(2,1,i)," J domo=",psi_configuration(1,2,i),&
+                !psi_configuration(2,2,i)
                 do ii=1,N_int
                   Isomo = Ialpha(ii,1)
                   Jsomo = psi_configuration(ii,1,i)
                   Idomo = Ialpha(ii,2)
                   Jdomo = psi_configuration(ii,2,i)
                   if(popcnt(IEOR(Idomo,Jdomo)) > 0)then
-                    q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ii * bit_kind_size
+                    q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 enddo
@@ -419,9 +438,14 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                   Isomo = Ialpha(ii,1)
                   Jsomo = psi_configuration(ii,1,i)
                   Isomo = IEOR(Isomo, Jsomo)
-                  Isomo = IBCLR(Isomo,q-1)
+                  iint = shiftr(q-1,bit_kind_shift) + 1
+                  ipos = q-shiftl((iint-1),bit_kind_shift)
+                  if(iint .eq. ii)then
+                    Isomo = IBCLR(Isomo,ipos-1)
+                  endif
+                  !print *,"ii=",ii," Isomo=",Isomo
                   if(popcnt(Isomo) > 0)then
-                    p = TRAILZ(Isomo) + 1 + ii * bit_kind_size
+                    p = TRAILZ(Isomo) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 enddo
@@ -429,6 +453,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            !assert ( p == pp)
            !assert ( q == qq)
            endif
+           !print *," 2--- p=",p," q=",q
         case (2)
            ! DOMO -> SOMO
            !print *,"obt DOMO -> SOMO"
@@ -447,7 +472,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                Jdomo = psi_configuration(ii,2,i)
                IJsomo = IEOR(Isomo, Jsomo)
                if(popcnt(IAND(Jsomo,IJsomo)) > 0)then
-                 p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 + ii * bit_kind_size
+                 p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 + (ii-1) * bit_kind_size
                  EXIT
                endif
              enddo
@@ -456,20 +481,26 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                Isomo = Ialpha(ii,1)
                Jsomo = psi_configuration(ii,1,i)
                IJsomo = IEOR(Isomo, Jsomo)
-               IJsomo = IBCLR(IJsomo,p-1)
+               iint = shiftr(p-1,bit_kind_shift) + 1
+               ipos = p-shiftl((iint-1),bit_kind_shift)
+               if(iint .eq. ii)then
+                 IJsomo = IBCLR(IJsomo,ipos-1)
+               endif
                if(popcnt(IJsomo) > 0)then
-                 q = TRAILZ(IJsomo) + 1 + ii * bit_kind_size
+                 q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size
                  EXIT
                endif
              enddo
            endif
            !assert ( p == pp)
            !assert ( q == qq)
+           !print *," 3--- p=",p," q=",q
         case default
            print *,"something went wront in get connectedI"
         end select
         starti = psi_config_data(i,1)
         endi   = psi_config_data(i,2)
+        nconnectedExtradiag+=1
         nconnectedI += 1
         do k=1,N_int
           connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
@@ -534,6 +565,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            if(holetype(k) .EQ. 1) then
               starti = psi_config_data(i,1)
               endi   = psi_config_data(i,2)
+              nconnectedDiag+=1
               nconnectedI += 1
               connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
               idxs_connectedI(nconnectedI)=starti
@@ -544,6 +576,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            else
               starti = psi_config_data(i,1)
               endi   = psi_config_data(i,2)
+              nconnectedDiag+=1
               nconnectedI += 1
               connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
               idxs_connectedI(nconnectedI)=starti
@@ -556,5 +589,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
         enddo
      endif
   end do
+  !print *,"nconnectedExtradiag=",nconnectedExtradiag," nconnectedDiad=",nconnectedDiag
 
 end subroutine obtain_connected_I_foralpha
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 76f9bfc3..9fe81fe9 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -885,7 +885,7 @@ subroutine calculate_preconditioner_cfg(diag_energies)
      Idomo = psi_configuration(1,2,i)
      Icfg(1,1) = psi_configuration(1,1,i)
      Icfg(1,2) = psi_configuration(1,2,i)
-     NSOMOI = getNSOMO(psi_configuration(:,:,i))
+     !NSOMOI = getNSOMO(psi_configuration(:,:,i))
 
      starti = psi_config_data(i,1)
      endi   = psi_config_data(i,2)
@@ -1239,27 +1239,34 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
   integer,intent(in)             :: p,q
   integer,intent(in)             :: extype
   integer,intent(out)            :: pmodel,qmodel
-  !integer(bit_kind)              :: Isomo(N_int)
-  !integer(bit_kind)              :: Idomo(N_int)
-  !integer(bit_kind)              :: Jsomo(N_int)
-  !integer(bit_kind)              :: Jdomo(N_int)
-  integer*8                       :: Isomo
-  integer*8                       :: Idomo
-  integer*8                       :: Jsomo
-  integer*8                       :: Jdomo
+  integer(bit_kind)              :: Isomo(N_int)
+  integer(bit_kind)              :: Idomo(N_int)
+  integer(bit_kind)              :: Jsomo(N_int)
+  integer(bit_kind)              :: Jdomo(N_int)
+  !integer*8                       :: Isomo
+  !integer*8                       :: Idomo
+  !integer*8                       :: Jsomo
+  !integer*8                       :: Jdomo
   integer*8                      :: mask
-  integer                        :: iint, ipos
+  integer                        :: iint, ipos, ii
   !integer(bit_kind)              :: Isomotmp(N_int)
   !integer(bit_kind)              :: Jsomotmp(N_int)
   integer*8             :: Isomotmp
   integer*8             :: Jsomotmp
   integer                        :: pos0,pos0prev
+  integer                        :: tmpp, tmpq
 
   ! TODO Flag (print) when model space indices is > 64
-  Isomo = Ialpha(1,1)
-  Idomo = Ialpha(1,2)
-  Jsomo = Jcfg(1,1)
-  Jdomo = Jcfg(1,2)
+  do ii=1,N_int
+    !Isomo = Ialpha(ii,1)
+    !Idomo = Ialpha(ii,2)
+    !Jsomo = Jcfg(ii,1)
+    !Jdomo = Jcfg(ii,2)
+    Isomo(ii) = Ialpha(ii,1)
+    Idomo(ii) = Ialpha(ii,2)
+    Jsomo(ii) = Jcfg(ii,1)
+    Jdomo(ii) = Jcfg(ii,2)
+  end do
   pos0prev = 0
   pmodel = p
   qmodel = q
@@ -1273,40 +1280,139 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
           ! SOMO -> SOMO
           ! remove all domos
           !print *,"type -> SOMO -> SOMO"
-          mask = ISHFT(1_8,p) - 1
-          Isomotmp = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomotmp = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !mask = ISHFT(1_8,p) - 1
+          !Isomotmp = IAND(Isomo,mask)
+          !pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !mask = ISHFT(1_8,q) - 1
+          !Isomotmp = IAND(Isomo,mask)
+          !qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+
+          iint = shiftr(p-1,bit_kind_shift) + 1
+          ipos = p-shiftl((iint-1),bit_kind_shift)-1
+          tmpp = 0
+          !print *,"iint=",iint, " p=",p
+          do ii=1,iint-1
+            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            Isomotmp = IAND(Isomo(ii),mask)
+            tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          end do
+          mask = ISHFT(1_bit_kind,ipos+1) - 1
+          Isomotmp = IAND(Isomo(iint),mask)
+          pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !print *,"iint=",iint, " ipos=",ipos,"pmodel=",pmodel, XOR(Isomotmp,mask),Isomo(iint)
+
+          iint = shiftr(q-1,bit_kind_shift) + 1
+          ipos = q-shiftl((iint-1),bit_kind_shift)-1
+          tmpq = 0
+          do ii=1,iint-1
+            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            Isomotmp = IAND(Isomo(ii),mask)
+            tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          end do
+          mask = ISHFT(1_bit_kind,ipos+1) - 1
+          Isomotmp = IAND(Isomo(iint),mask)
+          qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !print *,"iint=",iint, " ipos=",ipos,"qmodel=",qmodel
        case (2)
           ! DOMO -> VMO
           ! remove all domos except one at p
           !print *,"type -> DOMO -> VMO"
-          mask = ISHFT(1_8,p) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          !mask = ISHFT(1_8,p) - 1
+          !Jsomotmp = IAND(Jsomo,mask)
+          !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          !mask = ISHFT(1_8,q) - 1
+          !Jsomotmp = IAND(Jsomo,mask)
+          !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+
+          iint = shiftr(p-1,bit_kind_shift) + 1
+          ipos = p-shiftl((iint-1),bit_kind_shift)-1
+          tmpp = 0
+          do ii=1,iint-1
+            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            Jsomotmp = IAND(Jsomo(ii),mask)
+            tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          end do
+          mask = ISHFT(1_bit_kind,ipos+1) - 1
+          Jsomotmp = IAND(Jsomo(iint),mask)
+          pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+
+          iint = shiftr(q-1,bit_kind_shift) + 1
+          ipos = q-shiftl((iint-1),bit_kind_shift)-1
+          tmpq = 0
+          do ii=1,iint-1
+            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            Jsomotmp = IAND(Jsomo(ii),mask)
+            tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          end do
+          mask = ISHFT(1_bit_kind,ipos+1) - 1
+          Jsomotmp = IAND(Jsomo(iint),mask)
+          qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
        case (3)
           ! SOMO -> VMO
           !print *,"type -> SOMO -> VMO"
           !Isomo = IEOR(Isomo,Jsomo)
           if(p.LT.q) then
-             mask = ISHFT(1_8,p) - 1
-             Isomo = IAND(Isomo,mask)
-             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-             mask = ISHFT(1_8,q) - 1
-             Jsomo = IAND(Jsomo,mask)
-             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+             !mask = ISHFT(1_8,p) - 1
+             !Isomo = IAND(Isomo,mask)
+             !pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+             !mask = ISHFT(1_8,q) - 1
+             !Jsomo = IAND(Jsomo,mask)
+             !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+
+             iint = shiftr(p-1,bit_kind_shift) + 1
+             ipos = p-shiftl((iint-1),bit_kind_shift)-1
+             tmpp = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Isomotmp = IAND(Isomo(ii),mask)
+               tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             end do
+             mask = ISHFT(1_8,ipos+1) - 1
+             Isomotmp = IAND(Isomo(iint),mask)
+             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+
+             iint = shiftr(q-1,bit_kind_shift) + 1
+             ipos = q-shiftl((iint-1),bit_kind_shift)-1
+             tmpq = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Jsomotmp = IAND(Jsomo(ii),mask)
+               tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Jsomotmp = IAND(Jsomo(iint),mask)
+             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
           else
-             mask = ISHFT(1_8,p) - 1
-             Isomo = IAND(Isomo,mask)
-             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
-             mask = ISHFT(1_8,q) - 1
-             Jsomo = IAND(Jsomo,mask)
-             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+             !mask = ISHFT(1_8,p) - 1
+             !Isomo = IAND(Isomo,mask)
+             !pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+             !mask = ISHFT(1_8,q) - 1
+             !Jsomo = IAND(Jsomo,mask)
+             !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+
+             iint = shiftr(p-1,bit_kind_shift) + 1
+             ipos = p-shiftl((iint-1),bit_kind_shift)-1
+             tmpp = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Isomotmp = IAND(Isomo(ii),mask)
+               tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Isomotmp = IAND(Isomo(iint),mask)
+             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
+
+             iint = shiftr(q-1,bit_kind_shift) + 1
+             ipos = q-shiftl((iint-1),bit_kind_shift)-1
+             tmpq = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Jsomotmp = IAND(Jsomo(ii),mask)
+               tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Jsomotmp = IAND(Jsomo(iint),mask)
+             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
           endif
        case (4)
           ! DOMO -> SOMO
@@ -1314,19 +1420,67 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
           !print *,"type -> DOMO -> SOMO"
           !Isomo = IEOR(Isomo,Jsomo)
           if(p.LT.q) then
-             mask = ISHFT(1_8,p) - 1
-             Jsomo = IAND(Jsomo,mask)
-             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-             mask = ISHFT(1_8,q) - 1
-             Isomo = IAND(Isomo,mask)
-             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+             !mask = ISHFT(1_8,p) - 1
+             !Jsomo = IAND(Jsomo,mask)
+             !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+             !mask = ISHFT(1_8,q) - 1
+             !Isomo = IAND(Isomo,mask)
+             !qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+
+             iint = shiftr(p-1,bit_kind_shift) + 1
+             ipos = p-shiftl((iint-1),bit_kind_shift)-1
+             tmpp = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Jsomotmp = IAND(Jsomo(ii),mask)
+               tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Jsomotmp = IAND(Jsomo(iint),mask)
+             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+
+             iint = shiftr(q-1,bit_kind_shift) + 1
+             ipos = q-shiftl((iint-1),bit_kind_shift)-1
+             tmpq = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Isomotmp = IAND(Isomo(ii),mask)
+               tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Isomotmp = IAND(Isomo(iint),mask)
+             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
           else
-             mask = ISHFT(1_8,p) - 1
-             Jsomo = IAND(Jsomo,mask)
-             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
-             mask = ISHFT(1_8,q) - 1
-             Isomo = IAND(Isomo,mask)
-             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+             !mask = ISHFT(1_8,p) - 1
+             !Jsomo = IAND(Jsomo,mask)
+             !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+             !mask = ISHFT(1_8,q) - 1
+             !Isomo = IAND(Isomo,mask)
+             !qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+
+             iint = shiftr(p-1,bit_kind_shift) + 1
+             ipos = p-shiftl((iint-1),bit_kind_shift)-1
+             tmpp = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Jsomotmp = IAND(Jsomo(ii),mask)
+               tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Jsomotmp = IAND(Jsomo(iint),mask)
+             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
+
+             iint = shiftr(q-1,bit_kind_shift) + 1
+             ipos = q-shiftl((iint-1),bit_kind_shift)-1
+             tmpq = 0
+             do ii=1,iint-1
+               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               Isomotmp = IAND(Isomo(ii),mask)
+               tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             end do
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
+             Isomotmp = IAND(Isomo(iint),mask)
+             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
           endif
        case default
           print *,"something is wrong in convertOrbIdsToModelSpaceIds"
@@ -1415,7 +1569,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
 
   allocate(diag_energies(n_CSF))
   call calculate_preconditioner_cfg(diag_energies)
-  !print *," diag energy =",diag_energies(1)
+  print *," diag energy =",diag_energies(1)
 
   MS = 0
   norm_coef_cfg=0.d0
@@ -1590,6 +1744,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
           call omp_set_lock(lock(jj))
           do kk = 1,n_st
             psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii)
+            print *,"jj=",jj,'psi_out(kk)=',psi_out(kk,jj)
           enddo
           call omp_unset_lock(lock(jj))
         enddo
@@ -1666,7 +1821,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
                                          nconnectedI, excitationIds, excitationTypes, diagfactors)
 
         !if(i .EQ. 1) then
-        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k))
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
+        !   kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
         !endif
 
         
@@ -1682,9 +1838,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
            NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
            p = excitationIds(1,j)
            q = excitationIds(2,j)
+           !print *,"j=",j, " p=",p," q=",q
            extype = excitationTypes(j)
            call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
            ! for E_pp E_rs and E_ppE_rr case
+           !if(k.eq.722)then
+           !  print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype
+           !endif
            rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
            colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
            !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype
@@ -1692,6 +1852,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
            rowsTKI = rowsikpq
         enddo
 
+        !if(i.eq.1)then
+        !  print *,"n_st=",n_st,"rowsTKI=",rowsTKI, " nconnectedI=",nconnectedI, &
+        !  "totcolsTKI=",totcolsTKI
+        !endif
         allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
         ! Initialize the integral container
         ! dims : (totcolsTKI, nconnectedI)
@@ -1721,10 +1885,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
                  TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) &
                     * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
               enddo
-           !if(i.eq.1) then
-           !      print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha)
-           !endif
            enddo
+           !if(i.eq.1) then
+           !      print *,"j=",j,"psi_in=",psi_in(1,idxs_connectedI_alpha(j)+m-1)
+           !endif
            enddo
 
            diagfactors_0 = diagfactors(j)*0.5d0
@@ -1763,16 +1927,24 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
            rowsTKI = rowsikpq
            CCmattmp = 0.d0
 
+        !if(i.eq.1)then
+        !  print *,"\t n_st=",n_st," colsikpq=",colsikpq," rowsTKI=",rowsTKI,&
+        !    " | ",size(TKIGIJ,1),size(AIJpqContainer,1),size(CCmattmp,1)
+        !endif
            call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0,        &
                TKIGIJ(1,1,j), size(TKIGIJ,1),                        &
                AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha),  &
                size(AIJpqContainer,1), 0.d0,                         &
                CCmattmp, size(CCmattmp,1) )
 
+           !print *,"j=",j,"colsikpq=",colsikpq, "sizeTIG=",size(TKIGIJ,1),"sizeaijpq=",size(AIJpqContainer,1)
            do m = 1,colsikpq
               call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1))
               do kk = 1,n_st
                  psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m)
+                 !if(dabs(CCmattmp(kk,m)).gt.1e-10)then
+                 !  print *, CCmattmp(kk,m), " | ",idxs_connectedI_alpha(j)+m-1
+                 !end if
               enddo
               call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1))
            enddo
diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
index 659602a1..b88c188d 100644
--- a/src/davidson/diagonalization_hcfg.irp.f
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -112,6 +112,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
   double precision, allocatable  :: U(:,:), U_csf(:,:), overlap(:,:)
   double precision, allocatable  :: tmpU(:,:), tmpW(:,:)
   double precision, pointer      :: W(:,:), W_csf(:,:)
+  double precision, pointer      :: W2(:,:), W_csf2(:,:)
+  double precision, allocatable  :: U2(:,:), U_csf2(:,:)
   logical                        :: disk_based
   double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
 
@@ -234,12 +236,15 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
     call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/))
   else
     allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax))
+    allocate(W2(sze,N_st_diag),W_csf2(sze_csf,N_st_diag*itermax))
   endif
 
   allocate(                                                          &
       ! Large
       U(sze,N_st_diag),                                              &
+      U2(sze,N_st_diag),                                              &
       U_csf(sze_csf,N_st_diag*itermax),                              &
+      U_csf2(sze_csf,N_st_diag*itermax),                              &
 
       ! Small
       h(N_st_diag*itermax,N_st_diag*itermax),                        &
@@ -324,8 +329,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
                 tmpU(kk,ii) = U_csf(ii,shift+kk)
               enddo
             enddo
-            !tmpU     =0.0d0
-            !tmpU(1,2)=1.0d0
+            tmpU     =0.0d0
+            tmpU(1,1)=1.0d0
             double precision               :: irp_rdtsc
             double precision               :: ticks_0, ticks_1
             integer*8                      :: irp_imax
@@ -340,19 +345,19 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
               enddo
             enddo
 
-            !U_csf = 0.0d0
-            !U_csf(1,1) = 1.0d0
-            !u_in = 0.0d0
-            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
-            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
-            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
-            !do i=1,sze_csf
-            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-            !  if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
-            !    print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-            !  endif
-            !end do
-            !stop
+            U_csf = 0.0d0
+            U_csf(1,1) = 1.0d0
+            u_in = 0.0d0
+            call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            do i=1,sze_csf
+              print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+              !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+              !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+              !endif
+            end do
+            stop
             deallocate(tmpW)
             deallocate(tmpU)
         endif

From bb0c3e391ca041884e528c5c5c3b39cbb75c3909 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Thu, 15 Dec 2022 18:54:47 +0100
Subject: [PATCH 16/35] Fixed act_bitmask.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 19533bd5..578a9153 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -875,7 +875,7 @@ use bitmasks
       ndiffDOMO = 0
       nxordiffSOMODOMO = 0
       do ii=1,N_int
-        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
+        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k)))
         xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
         ndiffDOMO += POPCNT(diffDOMO)
         nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
@@ -1221,7 +1221,7 @@ END_PROVIDER
       ndiffDOMO = 0
       nxordiffSOMODOMO = 0
       do ii=1,N_int
-        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k)))
+        diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k)))
         xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
         ndiffDOMO += POPCNT(diffDOMO)
         nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)

From a6e844ad615579334567a28bd0e3ab02c0b81b15 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Fri, 16 Dec 2022 13:29:59 +0100
Subject: [PATCH 17/35] Looks like CIS is working.

---
 src/csf/configuration_CI_sigma_helpers.irp.f |  25 +--
 src/csf/conversion.irp.f                     |   2 +-
 src/csf/obtain_I_foralpha.irp.f              |  68 ++++----
 src/csf/sigma_vector.irp.f                   | 167 +++++++++++++------
 src/davidson/diagonalization_hcfg.irp.f      |  30 ++--
 5 files changed, 182 insertions(+), 110 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 578a9153..4473b1fa 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -586,7 +586,7 @@ use bitmasks
 
     do i=1, N_int
       Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
+      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
     enddo
 
     ! find out all pq holes possible
@@ -667,7 +667,7 @@ use bitmasks
         qq  = listvmos(j)
         if(pp.eq.qq) cycle
         jint = shiftr(qq-1,bit_kind_shift) + 1
-        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        jpos = qq-shiftl((jint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
@@ -769,7 +769,7 @@ use bitmasks
     ! prune list of alphas
     do i=1, N_int
       Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
+      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
@@ -790,7 +790,7 @@ use bitmasks
       do j = 1, nvmos
         qq  = listvmos(j)
         jint = shiftr(qq-1,bit_kind_shift) + 1
-        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        jpos = qq-shiftl((jint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
@@ -857,7 +857,7 @@ use bitmasks
     ppExistsQ = .False.
     do i=1, N_int
       Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,1),psi_configuration(i,2,idxI))
+      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
     enddo
 
     kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
@@ -953,7 +953,7 @@ END_PROVIDER
 
   do i=1, N_int
     Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-    Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+    Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
   enddo
 
   !print*,"Input cfg"
@@ -985,6 +985,7 @@ END_PROVIDER
 
   ! find vmos
     ! Take into account N_int
+    nvmos=0
     do ii = 1, n_act_orb
       iii = list_act(ii)
       iint = shiftr(iii-1,bit_kind_shift) + 1
@@ -1014,7 +1015,7 @@ END_PROVIDER
     ! Now find the allowed (p,q) excitations
     do i=1, N_int
       Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
@@ -1051,7 +1052,7 @@ END_PROVIDER
      do j = 1,nvmos
         qq  = listvmos(j)
         jint = shiftr(qq-1,bit_kind_shift) + 1
-        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        jpos = qq-shiftl((jint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
@@ -1157,7 +1158,7 @@ END_PROVIDER
   ! prune list of alphas
     do i=1, N_int
       Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,1),Icfg(i,2))
+      Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
       Jsomo(i) = Isomo(i)
       Jdomo(i) = Idomo(i)
     enddo
@@ -1176,7 +1177,7 @@ END_PROVIDER
      do j = 1, nvmos
         qq  = listvmos(j)
         jint = shiftr(qq-1,bit_kind_shift) + 1
-        jpos = qq-shiftl((iint-1),bit_kind_shift)-1
+        jpos = qq-shiftl((jint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
           Jsomo(jint) = IBSET(Jsomo(jint),jpos)
         else if(vmotype(j) == 2)then
@@ -1207,8 +1208,8 @@ END_PROVIDER
 
   ! Check if this Icfg has been previously generated as a mono
   ppExistsQ = .False.
-  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
-  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  !Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  !Idomo = iand(act_bitmask(1,2),Icfg(1,2))
   do k = 1, idxI-1
       do ii=1,N_int
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f
index 7c6c8363..92c8e669 100644
--- a/src/csf/conversion.irp.f
+++ b/src/csf/conversion.irp.f
@@ -114,7 +114,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   integer                        :: idx
   integer MS
   MS = elec_alpha_num-elec_beta_num
-  print *,"size=",size(tmp_psi_coef_det,1)," ",size(tmp_psi_coef_det,2)
+  !print *,"size=",size(tmp_psi_coef_det,1)," ",size(tmp_psi_coef_det,2)
 
   countcsf = 0
 
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index 1d4e81fc..8b588f4e 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -324,14 +324,14 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            ! SOMO -> VMO
            !print *,"obt SOMO -> VMO"
            extyp = 3
-           if(N_int .eq. 1) then
-             IJsomo = IEOR(Isomo, Jsomo)
-             p = TRAILZ(IAND(Isomo,IJsomo)) + 1
-             IJsomo = IBCLR(IJsomo,p-1)
-             q = TRAILZ(IJsomo) + 1
-             !print *," p=",p," q=",q
-             !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int)
-           else
+           !if(N_int .eq. 1) then
+           !  IJsomo = IEOR(Isomo, Jsomo)
+           !  p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+           !  IJsomo = IBCLR(IJsomo,p-1)
+           !  q = TRAILZ(IJsomo) + 1
+           !  !print *," p=",p," q=",q
+           !  !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int)
+           !else
              ! Find p
              do ii=1,N_int
                Isomo = Ialpha(ii,1)
@@ -357,7 +357,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                  EXIT
                endif
              enddo
-           endif
+           !endif
            !assert ( p == pp)
            !assert ( q == qq)
            !print *," 1--- p=",p," q=",q
@@ -369,12 +369,12 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
               ! DOMO -> VMO
               !print *,"obt DOMO -> VMO"
               extyp = 2
-              if(N_int.eq.1)then
-                p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
-                Isomo = IEOR(Isomo, Jsomo)
-                Isomo = IBCLR(Isomo,p-1)
-                q = TRAILZ(Isomo) + 1
-              else
+              !if(N_int.eq.1)then
+              !  p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+              !  Isomo = IEOR(Isomo, Jsomo)
+              !  Isomo = IBCLR(Isomo,p-1)
+              !  q = TRAILZ(Isomo) + 1
+              !else
 
                 ! Find p
                 do ii=1,N_int
@@ -402,23 +402,23 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                     EXIT
                   endif
                 end do
-              endif
+              !endif
            !assert ( p == pp)
            !assert ( q == qq)
            else
               ! SOMO -> SOMO
               !print *,"obt SOMO -> SOMO"
               extyp = 1
-              if(N_int.eq.1)then
-                q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
-                Isomo = IEOR(Isomo, Jsomo)
-                Isomo = IBCLR(Isomo,q-1)
-                p = TRAILZ(Isomo) + 1
-                ! Check for Minimal alpha electrons (MS)
-                !if(POPCNT(Isomo).lt.MS)then
-                !  cycle
-                !endif
-              else
+              !if(N_int.eq.1)then
+              !  q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+              !  Isomo = IEOR(Isomo, Jsomo)
+              !  Isomo = IBCLR(Isomo,q-1)
+              !  p = TRAILZ(Isomo) + 1
+              !  ! Check for Minimal alpha electrons (MS)
+              !  !if(POPCNT(Isomo).lt.MS)then
+              !  !  cycle
+              !  !endif
+              !else
                 ! Find p
                 !print *,"Ialpha somo=",Ialpha(1,1), Ialpha(2,1)," Ialpha domo=",Ialpha(1,2), Ialpha(2,2)
                 !print *,"J somo=",psi_configuration(1,1,i), psi_configuration(2,1,i)," J domo=",psi_configuration(1,2,i),&
@@ -449,7 +449,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                     EXIT
                   endif
                 enddo
-              endif
+              !endif
            !assert ( p == pp)
            !assert ( q == qq)
            endif
@@ -458,12 +458,12 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
            ! DOMO -> SOMO
            !print *,"obt DOMO -> SOMO"
            extyp = 4
-           if(N_int.eq.1)then
-             IJsomo = IEOR(Isomo, Jsomo)
-             p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
-             IJsomo = IBCLR(IJsomo,p-1)
-             q = TRAILZ(IJsomo) + 1
-           else
+           !if(N_int.eq.1)then
+           !  IJsomo = IEOR(Isomo, Jsomo)
+           !  p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+           !  IJsomo = IBCLR(IJsomo,p-1)
+           !  q = TRAILZ(IJsomo) + 1
+           !else
              ! Find p
              do ii=1,N_int
                Isomo = Ialpha(ii,1)
@@ -491,7 +491,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                  EXIT
                endif
              enddo
-           endif
+           !endif
            !assert ( p == pp)
            !assert ( q == qq)
            !print *," 3--- p=",p," q=",q
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 9fe81fe9..193fcf0e 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -1538,8 +1538,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   integer                        :: rowsTKI
   integer                        :: noccpp
   integer                        :: istart_cfg, iend_cfg, num_threads_max
+  integer                        :: iint, jint, ipos, jpos, Nsomo_I, iii
   integer                        :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ
-  integer*8                      :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
+  integer*8                      :: MS,Ialpha, Ibeta
+  integer(bit_kind)              :: Isomo(N_INT)
+  integer(bit_kind)              :: Idomo(N_INT)
+  integer(bit_kind)              :: Jsomo(N_INT)
+  integer(bit_kind)              :: Jdomo(N_INT)
   integer                        :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
   real*8                         :: norm_coef_cfg, fac2eints
   real*8                         :: norm_coef_det
@@ -1554,6 +1559,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   real*8,dimension(:),allocatable:: diag_energies
   real*8                         :: tmpvar, tmptot
   real*8                         :: core_act_contrib
+  integer :: listall(N_int*bit_kind_size), nelall
 
   integer(omp_lock_kind), allocatable :: lock(:)
   call omp_set_max_active_levels(1)
@@ -1569,7 +1575,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
 
   allocate(diag_energies(n_CSF))
   call calculate_preconditioner_cfg(diag_energies)
-  print *," diag energy =",diag_energies(1)
+  !print *," diag energy =",diag_energies(1)
 
   MS = 0
   norm_coef_cfg=0.d0
@@ -1615,6 +1621,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
       !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,&
       !$OMP    N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,&
       !$OMP     pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp,       &
+      !$OMP     qq, iint, jint, ipos, jpos, nelall, listall, Nsomo_I, &
       !$OMP     AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,&
       !$OMP     n_core_orb, n_act_orb, list_act, n, list_core,  list_core_is_built,core_act_contrib, num_threads_max,&
       !$OMP     n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built)
@@ -1637,10 +1644,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
     ! else
     ! cycle
 
-     Icfg(1,1) = psi_configuration(1,1,i)
-     Icfg(1,2) = psi_configuration(1,2,i)
-     Isomo = Icfg(1,1)
-     Idomo = Icfg(1,2)
+    do ii=1,N_INT
+     Icfg(ii,1) = psi_configuration(ii,1,i)
+     Icfg(ii,2) = psi_configuration(ii,2,i)
+     Isomo(ii) = Icfg(ii,1)
+     Idomo(ii) = Icfg(ii,2)
+    enddo
      NSOMOI = getNSOMO(Icfg)
 
      ! find out all pq holes possible
@@ -1651,42 +1660,86 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      ! list_core_inact
      ! bitmasks
      !do k = 1,mo_num
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-       if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
-         nholes += 1
-         listholes(nholes) = k
-         holetype(nholes) = 1
-       endif
-     enddo
-     ! holes in DOMO
-     !do k = 1,mo_num
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-       if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
-         nholes += 1
-         listholes(nholes) = k
-         holetype(nholes) = 2
-       endif
-     enddo
+    ! do kk = 1,n_act_orb
+    !   k = list_act(kk)
+    !   if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+    !     nholes += 1
+    !     listholes(nholes) = k
+    !     holetype(nholes) = 1
+    !   endif
+    ! enddo
+    ! ! holes in DOMO
+    ! !do k = 1,mo_num
+    ! do kk = 1,n_act_orb
+    !   k = list_act(kk)
+    !   if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+    !     nholes += 1
+    !     listholes(nholes) = k
+    !     holetype(nholes) = 2
+    !   endif
+    ! enddo
 
-     ! find vmos
+    ! ! find vmos
      listvmos = -1
      vmotype = -1
      nvmos = 0
-     do kk = 1,n_act_orb
-       k = list_act(kk)
-       !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
-       if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
-         nvmos += 1
-         listvmos(nvmos) = k
-         vmotype(nvmos) = 0
-       else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
-         nvmos += 1
-         listvmos(nvmos) = k
-         vmotype(nvmos) = 1
-       end if
-     enddo
+    ! do kk = 1,n_act_orb
+    !   k = list_act(kk)
+    !   !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+    !   if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+    !     nvmos += 1
+    !     listvmos(nvmos) = k
+    !     vmotype(nvmos) = 0
+    !   else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+    !     nvmos += 1
+    !     listvmos(nvmos) = k
+    !     vmotype(nvmos) = 1
+    !   end if
+    ! enddo
+
+  ! find out all pq holes possible
+    nholes = 0
+        call bitstring_to_list(Isomo,listall,nelall,N_int)
+
+        do iii=1,nelall
+          nholes += 1
+          listholes(nholes) = listall(iii)
+          holetype(nholes) = 1
+        end do
+
+        Nsomo_I = nelall
+
+        call bitstring_to_list(Idomo,listall,nelall,N_int)
+
+        do iii=1,nelall
+          if(listall(iii) .gt. n_core_orb)then
+            nholes += 1
+            listholes(nholes) = listall(iii)
+            holetype(nholes) = 2
+          endif
+        end do
+
+
+  ! find vmos
+    ! Take into account N_int
+    do ii = 1, n_act_orb
+      iii = list_act(ii)
+      iint = shiftr(iii-1,bit_kind_shift) + 1
+      ipos = iii-shiftl((iint-1),bit_kind_shift)-1
+
+      if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
+        if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then
+          nvmos += 1
+          listvmos(nvmos) = iii
+          vmotype(nvmos) = 1
+        else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then
+          nvmos += 1
+          listvmos(nvmos) = iii
+          vmotype(nvmos) = 2
+        end if
+      end if
+    end do
+
 
 
      ! Icsf ids
@@ -1705,16 +1758,31 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
       extype = excitationTypes_single(j)
       ! Off diagonal terms
       call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel)
-      Jsomo = singlesI(1,1,j)
-      Jdomo = singlesI(1,2,j)
+      do ii=1,N_INT
+        Jsomo(ii) = singlesI(1,1,j)
+        Jdomo(ii) = singlesI(1,2,j)
+      enddo
+
+      ! Get actual p pos
+      pp  = p
+      iint = shiftr(pp-1,bit_kind_shift) + 1
+      ipos = pp-shiftl((iint-1),bit_kind_shift)-1
+
+      ! Get actual q pos
+      qq  = q
+      jint = shiftr(qq-1,bit_kind_shift) + 1
+      jpos = qq-shiftl((jint-1),bit_kind_shift)-1
 
       ! Add the hole on J
-      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      !if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      if(POPCNT(IAND(Jsomo(jint),IBSET(0_8,jpos))) .EQ. 1  .AND. POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then
         nholes += 1
         listholes(nholes) = q
         holetype(nholes) = 1
       endif
-      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      !if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      if((POPCNT(IAND(Jdomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Idomo(jint),IBSET(0_8,jpos))) .EQ. 0) .AND.&
+      POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then
         nholes += 1
         listholes(nholes) = q
         holetype(nholes) = 2
@@ -1744,17 +1812,18 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
           call omp_set_lock(lock(jj))
           do kk = 1,n_st
             psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii)
-            print *,"jj=",jj,'psi_out(kk)=',psi_out(kk,jj)
           enddo
           call omp_unset_lock(lock(jj))
         enddo
       enddo
 
       ! Undo setting in listholes
-      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      !if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      if(POPCNT(IAND(Jsomo(jint),IBSET(0_8,jpos))) .EQ. 1  .AND. POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then
         nholes -= 1
       endif
-      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+      if((POPCNT(IAND(Jdomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Idomo(jint),IBSET(0_8,jpos))) .EQ. 0) .AND.&
+      POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then
         nholes -= 1
       endif
     enddo
@@ -1790,8 +1859,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      ! else
      ! cycle
 
-     Icfg(1,1) = psi_configuration(1,1,i)
-     Icfg(1,2) = psi_configuration(1,2,i)
+     do ii=1,N_INT
+       Icfg(ii,1) = psi_configuration(ii,1,i)
+       Icfg(ii,2) = psi_configuration(ii,2,i)
+     enddo
      starti = psi_config_data(i,1)
      endi   = psi_config_data(i,2)
 
@@ -1806,7 +1877,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      !  print *,"Nalpha > maxnalpha"
      !endif
 
-     call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
+     !call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
 
      ! TODO : remove doubly excited for return
      !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5), "Nalphas_Icfg=",Nalphas_Icfg
diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
index b88c188d..00bbf543 100644
--- a/src/davidson/diagonalization_hcfg.irp.f
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -329,8 +329,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
                 tmpU(kk,ii) = U_csf(ii,shift+kk)
               enddo
             enddo
-            tmpU     =0.0d0
-            tmpU(1,1)=1.0d0
+            !tmpU     =0.0d0
+            !tmpU(1,1)=1.0d0
             double precision               :: irp_rdtsc
             double precision               :: ticks_0, ticks_1
             integer*8                      :: irp_imax
@@ -345,19 +345,19 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
               enddo
             enddo
 
-            U_csf = 0.0d0
-            U_csf(1,1) = 1.0d0
-            u_in = 0.0d0
-            call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
-            call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
-            call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
-            do i=1,sze_csf
-              print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-              !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
-              !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-              !endif
-            end do
-            stop
+            !U_csf = 0.0d0
+            !U_csf(1,1) = 1.0d0
+            !u_in = 0.0d0
+            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            !do i=1,sze_csf
+            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+            !  !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  !endif
+            !end do
+            !stop
             deallocate(tmpW)
             deallocate(tmpU)
         endif

From bc69ac42b13816b3988142d78a216f97d4014e40 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Fri, 16 Dec 2022 15:01:13 +0100
Subject: [PATCH 18/35] restore dimension of alphsIcfg.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 4473b1fa..3440c48e 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -524,7 +524,7 @@ use bitmasks
 !!!
 !!!END_PROVIDER
 
- BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*12)]
+ BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*mo_num)]
 &BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
   implicit none
   !use bitmasks

From 2e0c96fd6b320f75eb2cf03376eb5d4f233c9b23 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Fri, 16 Dec 2022 16:02:00 +0100
Subject: [PATCH 19/35] Fixed bug in calculation of Nsomo_J.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 37 +++++++++++---------
 src/csf/sigma_vector.irp.f                   | 12 ++++---
 src/davidson/diagonalization_hcfg.irp.f      | 30 ++++++++--------
 3 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 3440c48e..744ba205 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -591,6 +591,7 @@ use bitmasks
 
     ! find out all pq holes possible
     nholes = 0
+    listholes=-1
         call bitstring_to_list(Isomo,listall,nelall,N_int)
 
         do iii=1,nelall
@@ -638,12 +639,19 @@ use bitmasks
     tableUniqueAlphas = .FALSE.
 
     ! Now find the allowed (p,q) excitations
-    do i=1, N_int
-      Isomo(i) = iand(reunion_of_act_virt_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(reunion_of_act_virt_bitmask(i,2),psi_configuration(i,2,idxI))
-      Jsomo(i) = Isomo(i)
-      Jdomo(i) = Idomo(i)
+    do ii=1, N_int
+      !Isomo(ii) = iand(reunion_of_act_virt_bitmask(i,1),psi_configuration(ii,1,idxI))
+      !Idomo(ii) = iand(reunion_of_act_virt_bitmask(i,2),psi_configuration(ii,2,idxI))
+      Isomo(ii) = psi_configuration(ii,1,idxI)
+      Idomo(ii) = psi_configuration(ii,2,idxI)
+      Jsomo(ii) = Isomo(ii)
+      Jdomo(ii) = Idomo(ii)
     enddo
+    !print *,"I=",idxI
+    !print *,"Isomo=",Isomo(1)!, Isomo(2)
+    !print *,"Idomo=",Idomo(1)!, Idomo(2)
+    !print *,listholes
+    !print *,listvmos
 
     if(Nsomo_I .EQ. 0) then
       kstart = 1
@@ -789,6 +797,7 @@ use bitmasks
 
       do j = 1, nvmos
         qq  = listvmos(j)
+        if(pp.eq.qq) cycle
         jint = shiftr(qq-1,bit_kind_shift) + 1
         jpos = qq-shiftl((jint-1),bit_kind_shift)-1
         if(vmotype(j) == 1)then
@@ -797,31 +806,25 @@ use bitmasks
           Jdomo(jint) = IBSET(Jdomo(jint),jpos)
           Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
         endif
-        if(pp .EQ. qq) then
-          if(vmotype(j) == 1)then
-            Jsomo(jint) = IBCLR(Jsomo(jint),jpos)
-          else if(vmotype(j) == 2)then
-            Jdomo(jint) = IBCLR(Jdomo(jint),jpos)
-            Jsomo(jint) = IBSET(Jsomo(jint),jpos)
-          endif
-          cycle
-        endif
+        
         if(tableUniqueAlphas(pp,qq)) then
 
+        Nsomo_J = 0
         do ii=1, N_int
           Jcfg(ii,1) = Jsomo(ii)
           Jcfg(ii,2) = Jdomo(ii)
+          Nsomo_J += POPCNT(Jsomo(ii))
         enddo
 
-          call bitstring_to_list(Jcfg,listall,nelall,N_int)
-          Nsomo_J = nelall
+          !call bitstring_to_list(Jcfg,listall,nelall,N_int)
+          !Nsomo_J = nelall
 
           if(Nsomo_J .ge. NSOMOMin) then
             !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
             !print *," Idx = ",idxI, " Nalpha=",NalphaIcfg
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
-            if(n_core_orb .le. 63)then
+            if(n_core_orb .le. 64)then
               alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
             else
               n_core_orb_64 = n_core_orb
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 193fcf0e..2ff3912b 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -1892,8 +1892,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
                                          nconnectedI, excitationIds, excitationTypes, diagfactors)
 
         !if(i .EQ. 1) then
-        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
-        !   kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
+        !   !print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
+        !   !kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
+        !   kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
         !endif
 
         
@@ -1913,9 +1915,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
            extype = excitationTypes(j)
            call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
            ! for E_pp E_rs and E_ppE_rr case
-           !if(k.eq.722)then
-           !  print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype
-           !endif
+           if(i.eq.1)then
+             print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype
+           endif
            rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
            colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
            !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype
diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
index 00bbf543..b88c188d 100644
--- a/src/davidson/diagonalization_hcfg.irp.f
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -329,8 +329,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
                 tmpU(kk,ii) = U_csf(ii,shift+kk)
               enddo
             enddo
-            !tmpU     =0.0d0
-            !tmpU(1,1)=1.0d0
+            tmpU     =0.0d0
+            tmpU(1,1)=1.0d0
             double precision               :: irp_rdtsc
             double precision               :: ticks_0, ticks_1
             integer*8                      :: irp_imax
@@ -345,19 +345,19 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
               enddo
             enddo
 
-            !U_csf = 0.0d0
-            !U_csf(1,1) = 1.0d0
-            !u_in = 0.0d0
-            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
-            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
-            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
-            !do i=1,sze_csf
-            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-            !  !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
-            !  !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-            !  !endif
-            !end do
-            !stop
+            U_csf = 0.0d0
+            U_csf(1,1) = 1.0d0
+            u_in = 0.0d0
+            call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            do i=1,sze_csf
+              print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+              !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+              !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+              !endif
+            end do
+            stop
             deallocate(tmpW)
             deallocate(tmpU)
         endif

From feb55d1e6656642d99b2fbb944453fccc9cac793 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Sat, 17 Dec 2022 03:18:58 +0100
Subject: [PATCH 20/35] Fixed LIB

---
 scripts/compilation/qp_create_ninja | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja
index 7df3c62d..aad85778 100755
--- a/scripts/compilation/qp_create_ninja
+++ b/scripts/compilation/qp_create_ninja
@@ -126,7 +126,7 @@ def ninja_create_env_variable(pwd_config_file):
         try:
             content = ""
             with open(libfile,'r') as f:
-                content = f.read()
+                content = f.read().replace('\n','')
                 str_lib += " "+content
         except IOError:
             pass

From c3c61d4ba11dab538f0bf7cb106a4ade389d66b1 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 13:51:55 +0100
Subject: [PATCH 21/35] Removed debug print.

---
 src/csf/configurations.irp.f | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f
index a84cb4ab..aebf53d9 100644
--- a/src/csf/configurations.irp.f
+++ b/src/csf/configurations.irp.f
@@ -352,11 +352,6 @@ end
    psi_configuration(k,1,i) = ieor(psi_det(k,1,i),psi_det(k,2,i))
    psi_configuration(k,2,i) = iand(psi_det(k,1,i),psi_det(k,2,i))
   enddo
- if(i.eq.1)then
-   print *,'Preparing PSI_CONFIGURATION i=',i
-            print *," Icfg somo=",psi_configuration(1,1,1), " ", psi_configuration(2,1,1)
-            print *," Icfg domo=",psi_configuration(1,2,1), " ", psi_configuration(2,2,1)
- endif
  enddo
 
  ! Sort

From 6f86f05e7fa027cf2ade3d160d679a9755e45225 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 13:53:55 +0100
Subject: [PATCH 22/35] Removed test code.

---
 src/davidson/diagonalization_hcfg.irp.f | 40 ++++++++++++-------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
index b88c188d..8e12b9c8 100644
--- a/src/davidson/diagonalization_hcfg.irp.f
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -112,8 +112,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
   double precision, allocatable  :: U(:,:), U_csf(:,:), overlap(:,:)
   double precision, allocatable  :: tmpU(:,:), tmpW(:,:)
   double precision, pointer      :: W(:,:), W_csf(:,:)
-  double precision, pointer      :: W2(:,:), W_csf2(:,:)
-  double precision, allocatable  :: U2(:,:), U_csf2(:,:)
+  !double precision, pointer      :: W2(:,:), W_csf2(:,:)
+  !double precision, allocatable  :: U2(:,:), U_csf2(:,:)
   logical                        :: disk_based
   double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
 
@@ -236,15 +236,15 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
     call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/))
   else
     allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax))
-    allocate(W2(sze,N_st_diag),W_csf2(sze_csf,N_st_diag*itermax))
+    !allocate(W2(sze,N_st_diag),W_csf2(sze_csf,N_st_diag*itermax))
   endif
 
   allocate(                                                          &
       ! Large
       U(sze,N_st_diag),                                              &
-      U2(sze,N_st_diag),                                              &
+      !U2(sze,N_st_diag),                                              &
       U_csf(sze_csf,N_st_diag*itermax),                              &
-      U_csf2(sze_csf,N_st_diag*itermax),                              &
+      !U_csf2(sze_csf,N_st_diag*itermax),                              &
 
       ! Small
       h(N_st_diag*itermax,N_st_diag*itermax),                        &
@@ -329,8 +329,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
                 tmpU(kk,ii) = U_csf(ii,shift+kk)
               enddo
             enddo
-            tmpU     =0.0d0
-            tmpU(1,1)=1.0d0
+            !tmpU     =0.0d0
+            !tmpU(1,1)=1.0d0
             double precision               :: irp_rdtsc
             double precision               :: ticks_0, ticks_1
             integer*8                      :: irp_imax
@@ -345,19 +345,19 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
               enddo
             enddo
 
-            U_csf = 0.0d0
-            U_csf(1,1) = 1.0d0
-            u_in = 0.0d0
-            call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
-            call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
-            call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
-            do i=1,sze_csf
-              print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-              !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
-              !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
-              !endif
-            end do
-            stop
+            !U_csf = 0.0d0
+            !U_csf(1,1) = 1.0d0
+            !u_in = 0.0d0
+            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            !do i=1,sze_csf
+            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+            !  !  print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  !endif
+            !end do
+            !stop
             deallocate(tmpW)
             deallocate(tmpU)
         endif

From 0f600519cbc02fa9ad22f7aa9df03295487c22d3 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 13:54:34 +0100
Subject: [PATCH 23/35] Fixed bugs in obtain I.

---
 src/csf/obtain_I_foralpha.irp.f | 46 ++++++++++++++-------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
index 8b588f4e..211d5af6 100644
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -264,15 +264,15 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
   Icfg = Ialpha
   Nsomo_alpha = 0
   !print *," Ialpha="
-  do i=1,N_int
-    Isomo = Ialpha(i,1)
-    Idomo = Ialpha(i,2)
+  do ii=1,N_int
+    Isomo = Ialpha(ii,1)
+    Idomo = Ialpha(ii,2)
     Nsomo_alpha += POPCNT(Isomo)
     !print *,Isomo, Idomo, "Nsomo=",Nsomo_alpha
   end do
   end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
-  if(end_index .LT. 0) end_index= N_configuration
-  !end_index = N_configuration
+  if(end_index .LT. 0 .OR. end_index .lt. idxI) end_index= N_configuration
+  end_index = N_configuration
 
 
   p = 0
@@ -283,17 +283,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
      if(Nsomo_alpha .lt. MS)then
        cycle
      endif
-     !Isomo = Ialpha(1,1)
-     !Idomo = Ialpha(1,2)
-     !Jsomo = psi_configuration(1,1,i)
-     !Jdomo = psi_configuration(1,2,i)
-     !diffSOMO = IEOR(Isomo,Jsomo)
-     !ndiffSOMO = POPCNT(diffSOMO)
-     !diffDOMO = IEOR(Idomo,Jdomo)
-     !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
-     !ndiffDOMO = POPCNT(diffDOMO)
-     !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
-     !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
 
      ndiffSOMO = 0
      ndiffDOMO = 0
@@ -315,6 +304,9 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
        nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
        nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO)
      end do
+     !if(idxI.eq.218)then
+     !  print *,"I=",idxI,"Nsomo_alpha=",Nsomo_alpha,"nxordiffSOMODOMO(4)=",nxordiffSOMODOMO, " ndiffSOMO(2)=",ndiffSOMO, " ndiffDOMO=",ndiffDOMO
+     !endif
      !Jcfg = psi_configuration(:,:,i)
      !print *,"nxordiffSOMODOMO(4)=",nxordiffSOMODOMO, " ndiffSOMO(2)=",ndiffSOMO
 
@@ -391,14 +383,14 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                 do ii=1,N_int
                   Isomo = Ialpha(ii,1)
                   Jsomo = psi_configuration(ii,1,i)
-                  Isomo = IEOR(Isomo, Jsomo)
+                  IJsomo = IEOR(Isomo, Jsomo)
                   iint = shiftr(p-1,bit_kind_shift) + 1
                   ipos = p-shiftl((iint-1),bit_kind_shift)
                   if(iint .eq. ii)then
-                    Isomo = IBCLR(Isomo,ipos-1)
+                    IJsomo = IBCLR(IJsomo,ipos-1)
                   endif
-                  if(popcnt(Isomo) > 0)then
-                    q = TRAILZ(Isomo) + 1 + (ii-1) * bit_kind_size
+                  if(popcnt(IJsomo) > 0)then
+                    q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 end do
@@ -437,15 +429,15 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
                 do ii=1,N_int
                   Isomo = Ialpha(ii,1)
                   Jsomo = psi_configuration(ii,1,i)
-                  Isomo = IEOR(Isomo, Jsomo)
+                  IJsomo = IEOR(Isomo, Jsomo)
                   iint = shiftr(q-1,bit_kind_shift) + 1
                   ipos = q-shiftl((iint-1),bit_kind_shift)
                   if(iint .eq. ii)then
-                    Isomo = IBCLR(Isomo,ipos-1)
+                    IJsomo = IBCLR(IJsomo,ipos-1)
                   endif
                   !print *,"ii=",ii," Isomo=",Isomo
-                  if(popcnt(Isomo) > 0)then
-                    p = TRAILZ(Isomo) + 1 + (ii-1) * bit_kind_size
+                  if(popcnt(IJsomo) > 0)then
+                    p = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size
                     EXIT
                   endif
                 enddo
@@ -502,9 +494,9 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
         endi   = psi_config_data(i,2)
         nconnectedExtradiag+=1
         nconnectedI += 1
-        do k=1,N_int
-          connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
-          connectedI(k,2,nconnectedI) = psi_configuration(k,2,i)
+        do ii=1,N_int
+          connectedI(ii,1,nconnectedI) = psi_configuration(ii,1,i)
+          connectedI(ii,2,nconnectedI) = psi_configuration(ii,2,i)
         enddo
         idxs_connectedI(nconnectedI)=starti
         excitationIds(1,nconnectedI)=p

From 82409885de49037d00ec7053520c8f3fbc959ca5 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 13:55:20 +0100
Subject: [PATCH 24/35] Fixed some bugs in generating alphs.

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 120 +++++++++++--------
 1 file changed, 68 insertions(+), 52 deletions(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 744ba205..bebf08a3 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -565,6 +565,7 @@ use bitmasks
   integer                        :: p, pp, p_s
   integer                        :: q, qq, q_s
   integer                        :: countalphas
+  integer                        :: countelec
   logical                        :: pqAlreadyGenQ
   logical                        :: pqExistsQ
   logical                        :: ppExistsQ
@@ -584,9 +585,11 @@ use bitmasks
     Icfg  = psi_configuration(:,:,idxI)
     Jcfg  = psi_configuration(:,:,idxI)
 
-    do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
+    !print *,"idxI=",idxI
+    do ii=1, N_int
+      Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI))
+      Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI))
+      !print *,Isomo(ii), Idomo(ii)
     enddo
 
     ! find out all pq holes possible
@@ -640,24 +643,22 @@ use bitmasks
 
     ! Now find the allowed (p,q) excitations
     do ii=1, N_int
-      !Isomo(ii) = iand(reunion_of_act_virt_bitmask(i,1),psi_configuration(ii,1,idxI))
-      !Idomo(ii) = iand(reunion_of_act_virt_bitmask(i,2),psi_configuration(ii,2,idxI))
-      Isomo(ii) = psi_configuration(ii,1,idxI)
-      Idomo(ii) = psi_configuration(ii,2,idxI)
+      !Isomo(ii) = iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,idxI))
+      !Idomo(ii) = iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,idxI))
+      Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI))
+      Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI))
+      !Isomo(ii) = psi_configuration(ii,1,idxI)
+      !Idomo(ii) = psi_configuration(ii,2,idxI)
       Jsomo(ii) = Isomo(ii)
       Jdomo(ii) = Idomo(ii)
     enddo
-    !print *,"I=",idxI
-    !print *,"Isomo=",Isomo(1)!, Isomo(2)
-    !print *,"Idomo=",Idomo(1)!, Idomo(2)
-    !print *,listholes
-    !print *,listvmos
 
     if(Nsomo_I .EQ. 0) then
       kstart = 1
     else
       kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
     endif
+    kstart = 1
     kend = idxI-1
 
     do i = 1,nholes
@@ -690,9 +691,6 @@ use bitmasks
           Nsomo_J += POPCNT(Jsomo(ii))
         enddo
 
-        !call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int)
-        !Nsomo_J = nelall
-
         ! Check for Minimal alpha electrons (MS)
         if(Nsomo_J.lt.MS)then
           if(vmotype(j) == 1)then
@@ -716,9 +714,9 @@ use bitmasks
           ndiffDOMO = 0
           nxordiffSOMODOMO = 0
           do ii = 1, N_int
-            diffSOMO = IEOR(Jcfg(ii,1),iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k)))
+            diffSOMO = IEOR(Jcfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
             ndiffSOMO += POPCNT(diffSOMO)
-            diffDOMO = IEOR(Jcfg(ii,2),iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k)))
+            diffDOMO = IEOR(Jcfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k)))
             xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
             ndiffDOMO += POPCNT(diffDOMO)
             nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO)
@@ -773,16 +771,26 @@ use bitmasks
     end do
 
     !print *,tableUniqueAlphas(:,:)
-
     ! prune list of alphas
-    do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
-      Jsomo(i) = Isomo(i)
-      Jdomo(i) = Idomo(i)
+    do ii=1, N_int
+      Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI))
+      Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI))
+      !Isomo(ii) = psi_configuration(ii,1,idxI)
+      !Idomo(ii) = psi_configuration(ii,2,idxI)
+      Jsomo(ii) = Isomo(ii)
+      Jdomo(ii) = Idomo(ii)
     enddo
     !print *, " Isomo=",Isomo(1), " Idomo=", Idomo(1)
 
+    !countelec=0
+    !do ii=1, N_int
+    !  countelec += POPCNT(Icfg(ii,1))*1 + POPCNT(Icfg(ii,2))*2
+    !enddo
+    !if(countelec .ne. 14)then
+    !  print *," idxI=",idxI, "00countelec=",countelec, " bit_kind_size=",bit_kind_size, " nvmo=",nvmos," mo_num=",mo_num
+    !  stop
+    !endif
+
     NalphaIcfg = 0
     do i = 1, nholes
       pp  = listholes(i)
@@ -809,21 +817,23 @@ use bitmasks
         
         if(tableUniqueAlphas(pp,qq)) then
 
-        Nsomo_J = 0
-        do ii=1, N_int
-          Jcfg(ii,1) = Jsomo(ii)
-          Jcfg(ii,2) = Jdomo(ii)
-          Nsomo_J += POPCNT(Jsomo(ii))
-        enddo
-
-          !call bitstring_to_list(Jcfg,listall,nelall,N_int)
-          !Nsomo_J = nelall
+          Nsomo_J = 0
+          countelec = 0
+          do ii=1, N_int
+            Jcfg(ii,1) = Jsomo(ii)
+            Jcfg(ii,2) = Jdomo(ii)
+            Nsomo_J += POPCNT(Jsomo(ii))
+            countelec += POPCNT(Jsomo(ii))*1 + POPCNT(Jdomo(ii))*2
+          enddo
 
           if(Nsomo_J .ge. NSOMOMin) then
             !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1)
             NalphaIcfg += 1
-            !print *," Idx = ",idxI, " Nalpha=",NalphaIcfg
+            !if(idxI.eq.8)then
+            !  print *," 1 Idx = ",idxI, " Nalpha=",NalphaIcfg, " n_core_orb=",n_core_orb
+            !endif
             alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
+            alphasIcfg_list(:,2,idxI,NalphaIcfg) = Jcfg(:,2)
             if(n_core_orb .le. 64)then
               alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
             else
@@ -839,6 +849,7 @@ use bitmasks
             endif
             NalphaIcfg_list(idxI) = NalphaIcfg
           endif
+          !print *,"         ", NalphaIcfg, Jsomo(1), Jsomo(2), "|", Jdomo(1), Jdomo(2)
         endif
 
         if(vmotype(j) == 1)then
@@ -858,12 +869,14 @@ use bitmasks
 
     ! Check if this Icfg has been previously generated as a mono
     ppExistsQ = .False.
-    do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),psi_configuration(i,1,idxI))
-      Idomo(i) = iand(act_bitmask(i,2),psi_configuration(i,2,idxI))
+    do ii=1, N_int
+      Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI))
+      Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI))
     enddo
+    !Icfg  = psi_configuration(:,:,idxI)
 
-    kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    !kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    kstart = 1
     ndiffDOMO = 0
     do k = kstart, idxI-1
       ndiffSOMO = 0
@@ -871,7 +884,7 @@ use bitmasks
         diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k)))
         ndiffSOMO += POPCNT(diffSOMO)
       end do
-      ! ndiffSOMO cannot be 0 (I /= k)
+      ! ndiffSOMO cannot be 0 (I /= k) if idxI is a single ex
       ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense
       ! this Icfg could not have been generated before.
       if (ndiffSOMO /= 2) cycle
@@ -897,9 +910,11 @@ use bitmasks
         alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2)
         NalphaIcfg_list(idxI) = NalphaIcfg
       endif
+          !print *,"         ---> ", NalphaIcfg, Icfg(1,1), Icfg(2,1), "|", Icfg(1,2), Icfg(2,2)
     endif
 
     NalphaIcfg = 0
+
   enddo ! end loop idxI
   call wall_time(t1)
   print *, 'Preparation : ', t1 - t0
@@ -954,9 +969,9 @@ END_PROVIDER
   logical                            :: ppExistsQ
   integer :: listall(N_int*bit_kind_size), nelall
 
-  do i=1, N_int
-    Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-    Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
+  do ii=1, N_int
+    Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1))
+    Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2))
   enddo
 
   !print*,"Input cfg"
@@ -1016,11 +1031,11 @@ END_PROVIDER
   tableUniqueAlphas = .FALSE.
 
     ! Now find the allowed (p,q) excitations
-    do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
-      Jsomo(i) = Isomo(i)
-      Jdomo(i) = Idomo(i)
+    do ii=1, N_int
+      Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1))
+      Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2))
+      Jsomo(ii) = Isomo(ii)
+      Jdomo(ii) = Idomo(ii)
     enddo
 
   ! Now find the allowed (p,q) excitations
@@ -1159,11 +1174,11 @@ END_PROVIDER
   !print *,tableUniqueAlphas(:,:)
 
   ! prune list of alphas
-    do i=1, N_int
-      Isomo(i) = iand(act_bitmask(i,1),Icfg(i,1))
-      Idomo(i) = iand(act_bitmask(i,2),Icfg(i,2))
-      Jsomo(i) = Isomo(i)
-      Jdomo(i) = Idomo(i)
+    do ii=1, N_int
+      Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1))
+      Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2))
+      Jsomo(ii) = Isomo(ii)
+      Jdomo(ii) = Idomo(ii)
     enddo
 
   NalphaIcfg = 0
@@ -1191,7 +1206,8 @@ END_PROVIDER
            ! SOMO
            NalphaIcfg += 1
            alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1)
-           if(n_core_orb .le. 63)then
+           alphasIcfg_list(:,2,idxI,NalphaIcfg) = Jcfg(:,2)
+           if(n_core_orb .le. 64)then
              alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1)
            else
              n_core_orb_64 = n_core_orb

From 16913557ca9813f59e9d9ae3e83c0143bc8f8261 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 13:56:27 +0100
Subject: [PATCH 25/35] Fixed bugs. Looks like S=1 Nint>1 is also working.

---
 src/csf/sigma_vector.irp.f | 179 ++++++++++++++++++++++---------------
 1 file changed, 105 insertions(+), 74 deletions(-)

diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 2ff3912b..541c3774 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -149,7 +149,6 @@
     ncfgprev = cfg_seniority_index(i+2)
   end do
   !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration
-
 END_PROVIDER
 
 
@@ -881,10 +880,10 @@ subroutine calculate_preconditioner_cfg(diag_energies)
 
   do i=1,N_configuration
 
-     Isomo = psi_configuration(1,1,i)
-     Idomo = psi_configuration(1,2,i)
-     Icfg(1,1) = psi_configuration(1,1,i)
-     Icfg(1,2) = psi_configuration(1,2,i)
+     !Isomo = psi_configuration(1,1,i)
+     !Idomo = psi_configuration(1,2,i)
+     !Icfg(1,1) = psi_configuration(1,1,i)
+     !Icfg(1,2) = psi_configuration(1,2,i)
      !NSOMOI = getNSOMO(psi_configuration(:,:,i))
 
      starti = psi_config_data(i,1)
@@ -894,6 +893,7 @@ subroutine calculate_preconditioner_cfg(diag_energies)
 
      ! find out all pq holes possible
      nholes = 0
+     listholes = -1
      ! holes in SOMO
      !do kk = 1,n_act_orb
      !  k = list_act(kk)
@@ -1258,10 +1258,6 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
 
   ! TODO Flag (print) when model space indices is > 64
   do ii=1,N_int
-    !Isomo = Ialpha(ii,1)
-    !Idomo = Ialpha(ii,2)
-    !Jsomo = Jcfg(ii,1)
-    !Jdomo = Jcfg(ii,2)
     Isomo(ii) = Ialpha(ii,1)
     Idomo(ii) = Ialpha(ii,2)
     Jsomo(ii) = Jcfg(ii,1)
@@ -1292,26 +1288,30 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
           tmpp = 0
           !print *,"iint=",iint, " p=",p
           do ii=1,iint-1
-            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-            Isomotmp = IAND(Isomo(ii),mask)
-            tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+            !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            !Isomotmp = IAND(Isomo(ii),mask)
+            !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+            tmpp += POPCNT(Isomo(ii))
           end do
           mask = ISHFT(1_bit_kind,ipos+1) - 1
           Isomotmp = IAND(Isomo(iint),mask)
-          pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          pmodel = tmpp + POPCNT(Isomotmp)
           !print *,"iint=",iint, " ipos=",ipos,"pmodel=",pmodel, XOR(Isomotmp,mask),Isomo(iint)
 
           iint = shiftr(q-1,bit_kind_shift) + 1
           ipos = q-shiftl((iint-1),bit_kind_shift)-1
           tmpq = 0
           do ii=1,iint-1
-            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-            Isomotmp = IAND(Isomo(ii),mask)
-            tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+            !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            !Isomotmp = IAND(Isomo(ii),mask)
+            !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+            tmpq += POPCNT(Isomo(ii))
           end do
           mask = ISHFT(1_bit_kind,ipos+1) - 1
           Isomotmp = IAND(Isomo(iint),mask)
-          qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          qmodel = tmpq + POPCNT(Isomotmp)
           !print *,"iint=",iint, " ipos=",ipos,"qmodel=",qmodel
        case (2)
           ! DOMO -> VMO
@@ -1328,25 +1328,29 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
           ipos = p-shiftl((iint-1),bit_kind_shift)-1
           tmpp = 0
           do ii=1,iint-1
-            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-            Jsomotmp = IAND(Jsomo(ii),mask)
-            tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+            !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            !Jsomotmp = IAND(Jsomo(ii),mask)
+            !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+            tmpp += POPCNT(Jsomo(ii))
           end do
           mask = ISHFT(1_bit_kind,ipos+1) - 1
           Jsomotmp = IAND(Jsomo(iint),mask)
-          pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          pmodel = tmpp + POPCNT(Jsomotmp)
 
           iint = shiftr(q-1,bit_kind_shift) + 1
           ipos = q-shiftl((iint-1),bit_kind_shift)-1
           tmpq = 0
           do ii=1,iint-1
-            mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-            Jsomotmp = IAND(Jsomo(ii),mask)
-            tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+            !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+            !Jsomotmp = IAND(Jsomo(ii),mask)
+            !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+            tmpq += POPCNT(Jsomo(ii))
           end do
           mask = ISHFT(1_bit_kind,ipos+1) - 1
           Jsomotmp = IAND(Jsomo(iint),mask)
-          qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          qmodel = tmpq + POPCNT(Jsomotmp)
        case (3)
           ! SOMO -> VMO
           !print *,"type -> SOMO -> VMO"
@@ -1363,25 +1367,29 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
              ipos = p-shiftl((iint-1),bit_kind_shift)-1
              tmpp = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Isomotmp = IAND(Isomo(ii),mask)
-               tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Isomotmp = IAND(Isomo(ii),mask)
+               !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               tmpp += POPCNT(Isomo(ii))
              end do
-             mask = ISHFT(1_8,ipos+1) - 1
+             mask = ISHFT(1_bit_kind,ipos+1) - 1
              Isomotmp = IAND(Isomo(iint),mask)
-             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             pmodel = tmpp + POPCNT(Isomotmp)
 
              iint = shiftr(q-1,bit_kind_shift) + 1
              ipos = q-shiftl((iint-1),bit_kind_shift)-1
              tmpq = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Jsomotmp = IAND(Jsomo(ii),mask)
-               tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Jsomotmp = IAND(Jsomo(ii),mask)
+               !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               tmpq += POPCNT(Jsomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Jsomotmp = IAND(Jsomo(iint),mask)
-             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
+             !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
+             qmodel = tmpq + POPCNT(Jsomotmp) + 1
           else
              !mask = ISHFT(1_8,p) - 1
              !Isomo = IAND(Isomo,mask)
@@ -1394,25 +1402,29 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
              ipos = p-shiftl((iint-1),bit_kind_shift)-1
              tmpp = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Isomotmp = IAND(Isomo(ii),mask)
-               tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Isomotmp = IAND(Isomo(ii),mask)
+               !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               tmpp += POPCNT(Isomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Isomotmp = IAND(Isomo(iint),mask)
-             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
+             !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
+             pmodel = tmpp + POPCNT(Isomotmp) + 1
 
              iint = shiftr(q-1,bit_kind_shift) + 1
              ipos = q-shiftl((iint-1),bit_kind_shift)-1
              tmpq = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Jsomotmp = IAND(Jsomo(ii),mask)
-               tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Jsomotmp = IAND(Jsomo(ii),mask)
+               !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               tmpq += POPCNT(Jsomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Jsomotmp = IAND(Jsomo(iint),mask)
-             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             qmodel = tmpq + POPCNT(Jsomotmp)
           endif
        case (4)
           ! DOMO -> SOMO
@@ -1431,25 +1443,29 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
              ipos = p-shiftl((iint-1),bit_kind_shift)-1
              tmpp = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Jsomotmp = IAND(Jsomo(ii),mask)
-               tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Jsomotmp = IAND(Jsomo(ii),mask)
+               !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               tmpp += POPCNT(Jsomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Jsomotmp = IAND(Jsomo(iint),mask)
-             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+             pmodel = tmpp + POPCNT(Jsomotmp)
 
              iint = shiftr(q-1,bit_kind_shift) + 1
              ipos = q-shiftl((iint-1),bit_kind_shift)-1
              tmpq = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Isomotmp = IAND(Isomo(ii),mask)
-               tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Isomotmp = IAND(Isomo(ii),mask)
+               !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               tmpq += POPCNT(Isomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Isomotmp = IAND(Isomo(iint),mask)
-             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
+             !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1
+             qmodel = tmpq + POPCNT(Isomotmp) + 1
           else
              !mask = ISHFT(1_8,p) - 1
              !Jsomo = IAND(Jsomo,mask)
@@ -1462,25 +1478,29 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod
              ipos = p-shiftl((iint-1),bit_kind_shift)-1
              tmpp = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Jsomotmp = IAND(Jsomo(ii),mask)
-               tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Jsomotmp = IAND(Jsomo(ii),mask)
+               !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+               tmpp += POPCNT(Jsomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Jsomotmp = IAND(Jsomo(iint),mask)
-             pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
+             !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1
+             pmodel = tmpp + POPCNT(Jsomotmp) + 1
 
              iint = shiftr(q-1,bit_kind_shift) + 1
              ipos = q-shiftl((iint-1),bit_kind_shift)-1
              tmpq = 0
              do ii=1,iint-1
-               mask = ISHFT(1_bit_kind,-1)-1_bit_kind
-               Isomotmp = IAND(Isomo(ii),mask)
-               tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               !mask = ISHFT(1_bit_kind,-1)-1_bit_kind
+               !Isomotmp = IAND(Isomo(ii),mask)
+               !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+               tmpq += POPCNT(Isomo(ii))
              end do
              mask = ISHFT(1_bit_kind,ipos+1) - 1
              Isomotmp = IAND(Isomo(iint),mask)
-             qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+             qmodel = tmpq + POPCNT(Isomotmp)
           endif
        case default
           print *,"something is wrong in convertOrbIdsToModelSpaceIds"
@@ -1560,6 +1580,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   real*8                         :: tmpvar, tmptot
   real*8                         :: core_act_contrib
   integer :: listall(N_int*bit_kind_size), nelall
+  integer :: countelec
 
   integer(omp_lock_kind), allocatable :: lock(:)
   call omp_set_max_active_levels(1)
@@ -1621,7 +1642,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
       !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,&
       !$OMP    N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,&
       !$OMP     pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp,       &
-      !$OMP     qq, iint, jint, ipos, jpos, nelall, listall, Nsomo_I, &
+      !$OMP     qq, iint, jint, ipos, jpos, nelall, listall, Nsomo_I, countelec,&
       !$OMP     AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,&
       !$OMP     n_core_orb, n_act_orb, list_act, n, list_core,  list_core_is_built,core_act_contrib, num_threads_max,&
       !$OMP     n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built)
@@ -1650,7 +1671,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      Isomo(ii) = Icfg(ii,1)
      Idomo(ii) = Icfg(ii,2)
     enddo
-     NSOMOI = getNSOMO(Icfg)
+    NSOMOI = getNSOMO(Icfg)
 
      ! find out all pq holes possible
      nholes = 0
@@ -1680,9 +1701,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
     ! enddo
 
     ! ! find vmos
-     listvmos = -1
-     vmotype = -1
-     nvmos = 0
     ! do kk = 1,n_act_orb
     !   k = list_act(kk)
     !   !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
@@ -1720,6 +1738,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
         end do
 
 
+     listvmos = -1
+     vmotype = -1
+     nvmos = 0
   ! find vmos
     ! Take into account N_int
     do ii = 1, n_act_orb
@@ -1835,6 +1856,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   deallocate(excitationTypes_single)
 
   !print *," singles part psi(1,1)=",psi_out(1,1)
+  !do i=1,n_CSF
+  !  print *,"i=",i," psi(i)=",psi_out(1,i)
+  !enddo
   
   allocate(listconnectedJ(N_INT,2,max(sze,10000)))
   allocate(alphas_Icfg(N_INT,2,max(sze,10000)))
@@ -1849,7 +1873,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
   !!!====================!!!
   !!! Double Excitations !!!
   !!!====================!!!
-
   ! Loop over all selected configurations
   !$OMP DO SCHEDULE(static)
   do i = istart_cfg,iend_cfg
@@ -1880,7 +1903,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
      !call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
 
      ! TODO : remove doubly excited for return
-     !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5), "Nalphas_Icfg=",Nalphas_Icfg
+     !print *,"I=",i,"isomo=",psi_configuration(1,1,i),psi_configuration(2,1,i),POPCNT(psi_configuration(1,1,i)),POPCNT(psi_configuration(2,1,i)),&
+     !"idomo=",psi_configuration(1,2,i),psi_configuration(2,2,i),POPCNT(psi_configuration(1,2,i)),POPCNT(psi_configuration(2,2,i)), "Nalphas_Icfg=",Nalphas_Icfg
      do k = 1,Nalphas_Icfg
         ! Now generate all singly excited with respect to a given alpha CFG
 
@@ -1891,11 +1915,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
         call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, &
                                          nconnectedI, excitationIds, excitationTypes, diagfactors)
 
-        !if(i .EQ. 1) then
-        !   !print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
-        !   !kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
-        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
-        !   kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
+        !if(i .EQ. 218) then
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
+        !   kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
+        !   !print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' &
+        !   !kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI
         !endif
 
         
@@ -1911,15 +1935,22 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
            NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
            p = excitationIds(1,j)
            q = excitationIds(2,j)
-           !print *,"j=",j, " p=",p," q=",q
            extype = excitationTypes(j)
+           !print *,"K=",k,"j=",j, "countelec=",countelec," p=",p," q=",q, " extype=",extype, "NSOMOalpha=",NSOMOalpha," NSOMOI=",NSOMOI, "alphas_Icfg(1,1,k)=",alphas_Icfg(1,1,k), &
+           !alphas_Icfg(2,1,k), " domo=",alphas_Icfg(1,2,k), alphas_Icfg(2,2,k), " connected somo=",connectedI_alpha(1,1,j), &
+           !connectedI_alpha(2,1,j), " domo=",connectedI_alpha(1,2,j), connectedI_alpha(2,2,j)
            call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
            ! for E_pp E_rs and E_ppE_rr case
-           if(i.eq.1)then
-             print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype
-           endif
            rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
            colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           !if(i.eq.218)then
+           !  print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype,&
+           !  "conn somo=",connectedI_alpha(1,1,j),connectedI_alpha(2,1,j),&
+           !  "conn domo=",connectedI_alpha(1,2,j),connectedI_alpha(2,2,j)
+           !  do m=1,colsikpq
+           !    print *,idxs_connectedI_alpha(j)+m-1
+           !  enddo
+           !endif
            !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype
            totcolsTKI += colsikpq
            rowsTKI = rowsikpq

From 4b52bc4a512a67ab4056b69ecfe21cb6a9d42df9 Mon Sep 17 00:00:00 2001
From: v1j4y <vijay.gopal.c@gmail.com>
Date: Mon, 19 Dec 2022 14:05:45 +0100
Subject: [PATCH 26/35] Revert to default diagonalization method.

---
 src/davidson/diagonalize_ci.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f
index 8ec6cd7e..76d8b65f 100644
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@@ -5,8 +5,8 @@ BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ]
  !
  ! If 'cfg', use <Psi_csf|H|Psi_csf> in Davidson
  END_DOC
- !sigma_vector_algorithm = 'det'
- sigma_vector_algorithm = 'cfg'
+ sigma_vector_algorithm = 'det'
+ !sigma_vector_algorithm = 'cfg'
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]

From e7428c50b84e832c0ed8d98b5dc7e3989904b8bb Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 23 Dec 2022 18:31:27 +0100
Subject: [PATCH 27/35] Fixed Davidson

---
 config/bull.cfg                                               | 4 ++--
 .../{test_dav.irp.f => test_dav.irp.f.example}                | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename src/dav_general_mat/{test_dav.irp.f => test_dav.irp.f.example} (100%)

diff --git a/config/bull.cfg b/config/bull.cfg
index 6a93fdca..91471473 100644
--- a/config/bull.cfg
+++ b/config/bull.cfg
@@ -6,7 +6,7 @@
 # --align=32                 : Align all provided arrays on a 32-byte boundary
 #
 [COMMON]
-FC           : mpiifort -shared-libgcc -shared-intel -fpic
+FC           : mpiifort -fpic -xCORE-AVX2 
 LAPACK_LIB   : -mkl=parallel 
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI  
@@ -31,7 +31,7 @@ OPENMP  : 1          ; Append OpenMP flags
 # -ftz                       : Flushes denormal results to zero
 #
 [OPT]
-FCFLAGS  : -xCORE-AVX2 -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive 
+FCFLAGS  : -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive 
 # Profiling flags
 #################
 #
diff --git a/src/dav_general_mat/test_dav.irp.f b/src/dav_general_mat/test_dav.irp.f.example
similarity index 100%
rename from src/dav_general_mat/test_dav.irp.f
rename to src/dav_general_mat/test_dav.irp.f.example

From aab67a151782fcd9dd511923057110b0ddea9653 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Fri, 23 Dec 2022 18:31:45 +0100
Subject: [PATCH 28/35] Fix warning in CSF:

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index cea7640c..508772b6 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -342,6 +342,8 @@ END_PROVIDER
   !call debug_spindet(Isomo,1)
   !call debug_spindet(Idomo,1)
 
+  alphasIcfg = 0_bit_kind
+
   ! find out all pq holes possible
   nholes = 0
   ! holes in SOMO

From 4c69fa20104bbd4700709d883d87871a8b47e6d0 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Sat, 31 Dec 2022 12:50:08 +0100
Subject: [PATCH 29/35] Fix csf

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index b69cd17e..76761d53 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -978,7 +978,7 @@ END_PROVIDER
   !call debug_spindet(Isomo,1)
   !call debug_spindet(Idomo,1)
 
-  alphasIcfg = 0_bit_kind
+  alphasIcfg(:,:,1) = 0_bit_kind
 
   ! find out all pq holes possible
     nholes = 0

From 547e6e88236f6fc7e1efd96ed99b18eced50ab09 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Mon, 2 Jan 2023 09:28:42 +0100
Subject: [PATCH 30/35] fix csf

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 76761d53..e6e6a0d7 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -1155,7 +1155,7 @@ END_PROVIDER
         !end do
 
         if(.NOT. pqExistsQ) then
-           tableUniqueAlphas(p,q) = .TRUE.
+           tableUniqueAlphas(pp,qq) = .TRUE.
            !print *,p,q
            !call debug_spindet(Jsomo,1)
            !call debug_spindet(Jdomo,1)

From 1a12e7f30882684cdc8dddcf96df05449ee54613 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 10:51:12 +0100
Subject: [PATCH 31/35] Fixed AO normalization problem

---
 src/ao_basis/EZFIO.cfg              |  6 +++---
 src/ao_basis/aos.irp.f              | 15 ++++++++-------
 src/ao_one_e_ints/pot_ao_ints.irp.f |  4 ++--
 src/ao_one_e_ints/pseudopot.f90     | 28 ++++++++++++++--------------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg
index dd61b1be..3ac16446 100644
--- a/src/ao_basis/EZFIO.cfg
+++ b/src/ao_basis/EZFIO.cfg
@@ -57,13 +57,13 @@ default: false
 
 [ao_normalized]
 type: logical
-doc: Use normalized basis functions
+doc: Normalize the atomic orbitals
 interface: ezfio, provider
-default: true
+default: false
 
 [primitives_normalized]
 type: logical
-doc: Use normalized primitive functions
+doc: Normalize the primitive basis functions
 interface: ezfio, provider
 default: true
 
diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f
index 3a9e9fb7..dafea9c4 100644
--- a/src/ao_basis/aos.irp.f
+++ b/src/ao_basis/aos.irp.f
@@ -63,15 +63,14 @@ END_PROVIDER
   ! Coefficients including the |AO| normalization
   END_DOC
 
-  do i=1,ao_num
-    l = ao_shell(i)
-    ao_coef_normalized(i,:) = shell_coef(l,:) * shell_normalization_factor(l)
-  end do
 
   double precision               :: norm,overlap_x,overlap_y,overlap_z,C_A(3), c
-  integer                        :: l, powA(3), nz
+  integer                        :: l, powA(3)
+  integer, parameter             :: nz=100
   integer                        :: i,j,k
-  nz=100
+
+   ao_coef_normalized(:,:) = ao_coef(:,:)
+
   C_A = 0.d0
 
   do i=1,ao_num
@@ -80,7 +79,7 @@ END_PROVIDER
     powA(2) = ao_power(i,2)
     powA(3) = ao_power(i,3)
 
-    ! Normalization of the primitives
+    ! GAMESS-type normalization of the primitives
     if (primitives_normalized) then
       do j=1,ao_prim_num(i)
         call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), &
@@ -91,6 +90,7 @@ END_PROVIDER
     ! Normalization of the contracted basis functions
     if (ao_normalized) then
       norm = 0.d0
+      l = ao_shell(i)
       do j=1,ao_prim_num(i)
         do k=1,ao_prim_num(i)
           call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
@@ -98,6 +98,7 @@ END_PROVIDER
         enddo
       enddo
       ao_coef_normalization_factor(i) = 1.d0/dsqrt(norm)
+      ao_coef_normalized(i,:) *= ao_coef_normalization_factor(i)
     else
       ao_coef_normalization_factor(i) = 1.d0
     endif
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index dc19f6c7..928053ad 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -18,6 +18,8 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
   double precision :: A_center(3),B_center(3),C_center(3)
   double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
 
+  ao_integrals_n_e = 0.d0
+
   if (read_ao_integrals_n_e) then
 
     call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e)
@@ -36,8 +38,6 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
 
     else
 
-      ao_integrals_n_e = 0.d0
-
       !$OMP PARALLEL                                                   &
           !$OMP DEFAULT (NONE)                                         &
           !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90
index 7321dff7..e02dea3b 100644
--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@@ -1950,26 +1950,26 @@ xq(17)=-3.34785456738322
 xq(18)=-3.94476404011563
 xq(19)=-4.60368244955074
 xq(20)=-5.38748089001123
-wq(1)=  2.229393645534151E-013
-wq(2)=  4.399340992273176E-010
-wq(3)=  1.086069370769280E-007
-wq(4)=  7.802556478532063E-006
-wq(5)=  2.283386360163528E-004
-wq(6)=  3.243773342237853E-003
-wq(7)=  2.481052088746362E-002
+wq(1)=  2.229393645534151D-013
+wq(2)=  4.399340992273176D-010
+wq(3)=  1.086069370769280D-007
+wq(4)=  7.802556478532063D-006
+wq(5)=  2.283386360163528D-004
+wq(6)=  3.243773342237853D-003
+wq(7)=  2.481052088746362D-002
 wq(8)=  0.109017206020022
 wq(9)=  0.286675505362834
 wq(10)= 0.462243669600610
 wq(11)= 0.462243669600610
 wq(12)= 0.286675505362834
 wq(13)= 0.109017206020022
-wq(14)= 2.481052088746362E-002
-wq(15)= 3.243773342237853E-003
-wq(16)= 2.283386360163528E-004
-wq(17)= 7.802556478532063E-006
-wq(18)= 1.086069370769280E-007
-wq(19)= 4.399340992273176E-010
-wq(20)= 2.229393645534151E-013
+wq(14)= 2.481052088746362D-002
+wq(15)= 3.243773342237853D-003
+wq(16)= 2.283386360163528D-004
+wq(17)= 7.802556478532063D-006
+wq(18)= 1.086069370769280D-007
+wq(19)= 4.399340992273176D-010
+wq(20)= 2.229393645534151D-013
 
       npts=20
 !      call gauher(xq,wq,npts)

From b4f233ae8571f6dde9d3135602126ce5f27e6699 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 11:00:07 +0100
Subject: [PATCH 32/35] Fixed qp_convert

---
 bin/qp_convert_output_to_ezfio | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio
index e7c44b37..e53a9392 100755
--- a/bin/qp_convert_output_to_ezfio
+++ b/bin/qp_convert_output_to_ezfio
@@ -162,11 +162,11 @@ def write_ezfio(res, filename):
     # P a r s i n g #
     # ~#~#~#~#~#~#~ #
 
-    prim_num_max = ezfio.get_ao_basis_ao_prim_num_max()
+    prim_num_max = max(ezfio.get_ao_basis_ao_prim_num())
+    ezfio.set_ao_basis_ao_prim_num_max(prim_num_max)
 
     for i in range(len(res.basis)):
-        coefficient[
-            i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
+        coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
         exponent[i] += [0. for j in range(len(exponent[i]), prim_num_max)]
 
     coefficient = reduce(lambda x, y: x + y, coefficient, [])

From 5911d134ef563162e8afbda454976a87b2d5437f Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 12:07:41 +0100
Subject: [PATCH 33/35] Cleaning

---
 config/ifort_2019_debug.cfg             | 66 +++++++++++++++++++++++++
 src/mo_two_e_ints/mo_bi_integrals.irp.f | 20 +++-----
 2 files changed, 73 insertions(+), 13 deletions(-)
 create mode 100644 config/ifort_2019_debug.cfg

diff --git a/config/ifort_2019_debug.cfg b/config/ifort_2019_debug.cfg
new file mode 100644
index 00000000..cb14f467
--- /dev/null
+++ b/config/ifort_2019_debug.cfg
@@ -0,0 +1,66 @@
+# Common flags
+##############
+#
+# -mkl=[parallel|sequential] : Use the MKL library
+# --ninja                 : Allow the utilisation of ninja. It is mandatory !
+# --align=32                 : Align all provided arrays on a 32-byte boundary
+#
+[COMMON]
+FC           : ifort -fpic
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+# 
+[OPTION]
+MODE    : DEBUG      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -xHost                     : Compile a binary optimized for the current architecture
+# -O2                        : O3 not better than O2.
+# -ip                        : Inter-procedural optimizations
+# -ftz                       : Flushes denormal results to zero
+#
+[OPT]
+FC       : -traceback
+FCFLAGS  : -msse4.2 -O2 -ip -ftz -g 
+
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC       : -p -g
+FCFLAGS  : -msse4.2 -O2 -ip -ftz 
+
+
+# Debugging flags
+#################
+#
+# -traceback   : Activate backtrace on runtime
+# -fpe0        : All floating point exaceptions
+# -C           : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+# -msse4.2     : Valgrind needs a very simple x86 executable
+#
+[DEBUG]
+FC      : -g -traceback
+FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0  -implicitnone
+
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -qopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index 56d8cf28..ae728943 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -38,7 +38,7 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
     print*, 'MO integrals provided'
     return
   else
-    PROVIDE ao_two_e_integrals_in_map 
+    PROVIDE ao_two_e_integrals_in_map
   endif
 
   print *,  ''
@@ -245,8 +245,6 @@ subroutine add_integrals_to_map(mask_ijkl)
     return
   endif
 
-  double precision               :: accu_bis
-  accu_bis = 0.d0
   call wall_time(wall_1)
 
   size_buffer = min( (qp_max_mem/(nproc*5)),mo_num*mo_num*mo_num)
@@ -256,7 +254,7 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax,   &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l,   &
       !$OMP  mo_coef_transp,                                         &
@@ -434,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
-  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
-      real(mo_integrals_threshold,integral_kind))
+  if (n_integrals > 0) then
+    call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+        real(mo_integrals_threshold,integral_kind))
+  endif
   deallocate(buffer_i, buffer_value)
   !$OMP END PARALLEL
   call map_merge(mo_integrals_map)
@@ -527,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
 
   call wall_time(wall_1)
   call cpu_time(cpu_1)
-  double precision               :: accu_bis
-  accu_bis = 0.d0
   !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,       &
       !$OMP  mo_coef_transp,                                         &
@@ -730,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
   call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
       real(mo_integrals_threshold,integral_kind))
   deallocate(buffer_i, buffer_value)

From a3dd01914482eab568b56f61ef0e05c75c8c94b2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 12:25:03 +0100
Subject: [PATCH 34/35] Fixed problem of buffer size in MO transformation

---
 src/mo_two_e_ints/mo_bi_integrals.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index ae728943..411d2d4e 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -247,7 +247,7 @@ subroutine add_integrals_to_map(mask_ijkl)
 
   call wall_time(wall_1)
 
-  size_buffer = min( (qp_max_mem/(nproc*5)),mo_num*mo_num*mo_num)
+  size_buffer = min(mo_num*mo_num*mo_num,8000000)
   print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
 

From 92a4e33f8a21717cab0c0e4f8412ed6903afb04a Mon Sep 17 00:00:00 2001
From: Abdallah Ammar <abd.ammar.phys@gmail.com>
Date: Mon, 16 Jan 2023 23:36:05 +0100
Subject: [PATCH 35/35]  clean fork with + TC stuffs

---
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    |   67 +-
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   |  518 +++++++++
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |   49 +-
 .../grad2_jmu_modif_vect.irp.f                |   22 +-
 .../grad_lapl_jmu_manu.irp.f                  |  369 ++++++
 .../grad_lapl_jmu_modif.irp.f                 |   62 +-
 .../grad_related_ints.irp.f                   |   46 +-
 src/ao_many_one_e_ints/list_grid.irp.f        |   59 +
 src/ao_many_one_e_ints/listj1b.irp.f          |   15 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  191 ++++
 .../prim_int_gauss_gauss.irp.f                |   99 +-
 src/ao_tc_eff_map/fit_j.irp.f                 |  125 ++
 src/ao_tc_eff_map/potential.irp.f             |  165 ++-
 src/bi_ort_ints/semi_num_ints_mo.irp.f        |  112 +-
 src/bi_ort_ints/three_body_ints_bi_ort.irp.f  |   43 +-
 src/bi_ortho_mos/bi_density.irp.f             |   57 +-
 src/bi_ortho_mos/mos_rl.irp.f                 |   47 +
 src/dft_utils_in_r/ao_in_r.irp.f              |   41 +
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      |  155 +++
 src/hartree_fock/fock_matrix_hf.irp.f         |   25 +-
 src/hartree_fock/scf.irp.f                    |   19 +-
 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f |  195 +++-
 src/non_h_ints_mu/grad_squared.irp.f          |  122 +-
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  221 ++++
 src/non_h_ints_mu/j12_nucl_utils.irp.f        |   17 +
 src/non_h_ints_mu/new_grad_tc.irp.f           |  284 ++++-
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      |  174 +++
 src/non_h_ints_mu/total_tc_int.irp.f          |   51 +-
 src/non_hermit_dav/biorthog.irp.f             |   62 +-
 .../lapack_diag_non_hermit.irp.f              |  174 +--
 src/non_hermit_dav/new_routines.irp.f         |   53 +-
 src/scf_utils/diagonalize_fock.irp.f          |   11 +-
 src/scf_utils/diis.irp.f                      |  179 +++
 src/scf_utils/fock_matrix.irp.f               |    2 +
 src/scf_utils/rh_scf_simple.irp.f             |  129 +++
 src/scf_utils/roothaan_hall_scf.irp.f         |   33 +-
 .../save_bitcpsileft_for_qmcchem.irp.f        |   35 +-
 src/tc_bi_ortho/tc_som.irp.f                  |   70 ++
 src/tc_bi_ortho/test_tc_fock.irp.f            |   84 +-
 src/tc_keywords/EZFIO.cfg                     |   52 +-
 src/tc_scf/diago_bi_ort_tcfock.irp.f          |  105 +-
 src/tc_scf/diis_tcscf.irp.f                   |  186 +++
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f         |  405 +++++++
 src/tc_scf/fock_tc.irp.f                      |  295 +++--
 src/tc_scf/fock_tc_mo_tot.irp.f               |   23 +
 src/tc_scf/fock_three.irp.f                   |  106 +-
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f  |  400 ++++---
 src/tc_scf/rh_tcscf_diis.irp.f                |  362 ++++++
 src/tc_scf/rh_tcscf_simple.irp.f              |  129 +++
 src/tc_scf/rotate_tcscf_orbitals.irp.f        |    8 +-
 src/tc_scf/routines_rotates.irp.f             |   12 +-
 src/tc_scf/tc_scf.irp.f                       |  166 +--
 src/tc_scf/tc_scf_dm.irp.f                    |   42 +-
 src/tc_scf/tc_scf_energy.irp.f                |   14 +-
 src/tc_scf/tc_scf_utils.irp.f                 |    1 +
 src/tc_scf/test_int.irp.f                     | 1008 +++++++++++++++++
 src/tools/print_he_energy.irp.f               |    4 +-
 57 files changed, 6528 insertions(+), 972 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b_sorted.irp.f
 create mode 100644 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f
 create mode 100644 src/scf_utils/rh_scf_simple.irp.f
 create mode 100644 src/tc_bi_ortho/tc_som.irp.f
 create mode 100644 src/tc_scf/diis_tcscf.irp.f
 create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_diis.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_simple.irp.f
 create mode 100644 src/tc_scf/test_int.irp.f

diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..d2115d9e 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -156,6 +156,53 @@ end function overlap_gauss_r12_ao
 
 ! --
 
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
 subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
@@ -177,7 +224,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_
   double precision, allocatable :: analytical_j(:)
 
   resv(:) = 0.d0
-  if(ao_overlap_abs(j,i).lt.1.d-12) then
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
     return
   endif
 
@@ -313,9 +360,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   ASSERT(beta .gt. 0.d0)
 
   if(beta .lt. 1d-10) then
-
     call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
-
     return
   endif
 
@@ -332,19 +377,20 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
   A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
-  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
 
   bg  = beta  * gama_inv
   dg  = delta * gama_inv
   bdg = bg * delta 
-  do ipoint=1,n_points
+
+  do ipoint = 1, n_points
+
     G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
     G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
     G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
-    fact_g(ipoint) = bdg * ( &
-          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
-        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
-        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
 
     if(fact_g(ipoint) < 10d0) then
       fact_g(ipoint) = dexp(-fact_g(ipoint))
@@ -368,8 +414,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
       do ipoint = 1, n_points
         coef12f = coef12 * fact_g(ipoint)
         resv(ipoint) += coef12f * analytical_j(ipoint)
-      end do
-
+      enddo
     enddo
   enddo
 
diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..4dd87a60
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,518 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+  double precision              :: int_gauss, dsqpi_3_2, int_j1b
+  double precision              :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,                                     &
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,                   &
+     !$OMP          final_grid_points_transp, ng_fit_jast,                                                  &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,                                             &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,                                           &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,              &
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+ do ipoint = 1, n_points_final_grid
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+   do i = 1, ao_num
+     do j = i, ao_num
+       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+         cycle
+       endif
+  
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+  
+         do i_fit = 1, ng_fit_jast
+  
+           expo_fit = expo_gauss_1_erf_x_2(i_fit)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+  
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+           int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+  
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+  
+         enddo
+        enddo
+       enddo
+     enddo
+   enddo
+
+   !$OMP END DO
+   !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, big_array,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+       do i_fit = 1, ng_fit_jast
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+ deallocate(int_fit_v)
+ !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+
+  print*, ' providing int2_u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+
+  print*, ' providing int2_u_grad1u_x_j1b2_test ...'
+
+  sq_pi_3_2 = dacos(-1.D0)**(1.d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+
+  print*, ' providing int2_u_grad1u_j1b2_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index b7fe234f..8196614f 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -19,9 +19,11 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_grad1u2_grad2u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_grad1u2_grad2u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -51,7 +53,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -88,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -111,9 +113,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -143,7 +147,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -186,7 +190,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
@@ -202,9 +206,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_x_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_x_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -241,7 +247,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
           tmp_x += coef_fit * int_fit(1)
           tmp_y += coef_fit * int_fit(2)
           tmp_z += coef_fit * int_fit(3)
-          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
           ! ---
 
@@ -265,7 +271,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -278,9 +284,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
         enddo
 
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -290,15 +296,15 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+  print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -320,9 +326,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
   double precision              :: wall0, wall1
   double precision, external    :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -351,7 +359,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +383,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
+
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
             tmp += coef_tmp * int_fit
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
index 6d3931f5..21927371 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -241,7 +241,7 @@
 !
 !! ---
 !
-!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 !
 !  BEGIN_DOC
 !  !
@@ -308,7 +308,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1)
 !
 !          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
 !            i_mask_grid1 += 1
@@ -320,7 +320,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2)
 !
 !          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
 !            i_mask_grid2 += 1
@@ -332,7 +332,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3)
 !
 !          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
 !            i_mask_grid3 += 1
@@ -408,15 +408,15 @@
 !          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
 !
 !          do ipoint = 1, i_mask_grid1
-!            int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid2
-!            int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid3
-!            int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
 !          enddo
 !
 !        enddo
@@ -439,15 +439,15 @@
 !  do ipoint = 1, n_points_final_grid
 !    do i = 2, ao_num
 !      do j = 1, i-1
-!        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
 !      enddo
 !    enddo
 !  enddo
 !
 !  call wall_time(wall1)
-!  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+!  print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0
 !
 !END_PROVIDER
 !
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..a6a55810
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,369 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+
+  print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          ! TODO :: cycle on the 1 - erf(mur12)
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)       & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle 
+!          endif
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+
+  print*, ' providing v_ij_u_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 6a662533..fc30cd83 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -17,9 +17,11 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
   double precision           :: wall0, wall1
   double precision, external :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print *, ' providing v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                         &
@@ -49,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -99,51 +101,23 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
   ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
   END_DOC
 
-  implicit none
-  integer          :: i, j, ipoint
-  double precision :: wall0, wall1
-
-  call wall_time(wall0)
-
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
-  END_DOC
-
   implicit none
   integer          :: i, j, ipoint, i_1s
   double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
-  x_v_ij_erf_rk_cst_mu_tmp_j1b = 0.d0
+  x_v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
  !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
- !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b, mu_erf)
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
@@ -169,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -195,9 +169,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
 
         ! ---
 
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -207,15 +181,15 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b', wall1 - wall0
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -239,9 +213,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
 
   double precision, external :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing v_ij_u_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_u_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -277,7 +253,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f
index 67fb0fe7..8624e7b8 100644
--- a/src/ao_many_one_e_ints/grad_related_ints.irp.f
+++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f
@@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
 
   double precision :: NAI_pol_mult_erf_ao
 
+  print*, ' providing v_ij_erf_rk_cst_mu ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -54,7 +56,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
   enddo
  
   call wall_time(wall1)
-  print*, ' wall time for v_ij_erf_rk_cst_mu  ', wall1 - wall0
+  print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -73,6 +75,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   double precision :: wall0, wall1
   double precision :: NAI_pol_mult_erf_ao
 
+  print *, ' providing v_ij_erf_rk_cst_mu_transp ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -107,7 +111,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for v_ij_erf_rk_cst_mu_transp  ', wall1 - wall0
+  print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -124,6 +128,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -162,13 +168,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_points_final_grid,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -178,6 +184,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -191,7 +199,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -207,6 +215,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -220,13 +230,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid,ao_num, ao_num,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -236,6 +246,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -249,7 +261,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -268,7 +280,9 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  double precision :: r(3), ints(3), ints_coulomb(3)
  double precision :: wall0, wall1
 
- call wall_time(wall0)
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
  !$OMP DEFAULT (NONE)                           &
@@ -295,7 +309,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -315,6 +329,8 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
   do i = 1, ao_num
     do j = 1, ao_num
@@ -327,7 +343,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -348,6 +364,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -375,7 +393,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -395,6 +413,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -408,7 +428,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   enddo
  
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..ccdc33ad
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, thr,fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list) 
+END_PROVIDER 
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 0b40170c..e27bf723 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -102,6 +102,12 @@ END_PROVIDER
     List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
   enddo
 
+  print *, ' coeff, expo & cent of list b2'
+  do i = 1, List_all_comb_b2_size
+    print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i)
+    print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i)
+  enddo
+
 END_PROVIDER
 
 ! ---
@@ -168,7 +174,6 @@ END_PROVIDER
 
     do j = 1, nucl_num
       tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      !print*, List_all_comb_b3(j,i), j1b_pen(j)
       List_all_comb_b3_expo(i)   += tmp_alphaj
       List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
       List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
@@ -220,9 +225,11 @@ END_PROVIDER
     List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
   enddo
 
-  print *, ' 1st coeff & expo of lists'
-  print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1)
-  print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1)
+  print *, ' coeff, expo & cent of list b3'
+  do i = 1, List_all_comb_b3_size
+    print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i)
+    print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i)
+  enddo
 
 END_PROVIDER
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..bf493fbb
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,191 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b2_size = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-15)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b2_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
+ enddo
+ max_List_comb_thr_b2_size = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-12)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_thr_b2_size(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b3_size = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b3_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
+!  enddo
+! enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
+ enddo
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_thr_b3_size(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
+ 
+END_PROVIDER 
+
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..54c2d95b 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -1,5 +1,9 @@
-double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math                      ::
@@ -8,6 +12,72 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   !
   END_DOC
 
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      coefy  = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy) .lt. thr) cycle
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        coefz   = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz) .lt. thr) cycle
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
+        accu += coefxyz * overlap
+      enddo
+    enddo
+  enddo
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
   implicit none
   include 'constants.include.F'
   double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
@@ -21,20 +91,23 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
   double precision               :: alpha_new         ! new exponent
   double precision               :: fact_a_new        ! constant factor
-  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
   integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
-  dim1=100
-  thr = 1.d-10
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
   call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
-    coefx = A_new(lx,1)
-    if(dabs(coefx).lt.thr)cycle
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
     iorder_tmp(1) = lx
     do ly = 0, iorder_a_new(2)
       coefy = A_new(ly,2)
@@ -46,12 +119,14 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
         coefxyz = coefxy * coefz
         if(dabs(coefxyz).lt.thr)cycle
         iorder_tmp(3) = lz
-        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-        accu += coefxyz * overlap
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
       enddo
     enddo
   enddo
-  overlap_gauss_r12 = fact_a_new * accu
+  overlap_abs_gauss_r12= accu
 end
 
 !---
@@ -95,11 +170,9 @@ subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_
 
   maxab = maxval(power_A(1:3))
 
-  allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
 
-  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new,   &
-        alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, &
-        D_center, LD_D, A_center, n_points)
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
 
   rvec(:) = 0.d0
 
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index 8fad9079..4730d003 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,5 +1,40 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
 ! ---
 
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
@@ -88,6 +123,36 @@ END_PROVIDER
       expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0  , -0.06548959d0  , -0.03539446d0  , -0.17150646d0  , -0.15071096d0  , -0.11326834d0   /)
+    expo_gauss_j_mu_x = (/ 9.88572565d+02,  1.21363371d+04,  3.69794870d+01,  1.67364529d+02,  3.03962934d+00,  1.27854005d+00,  9.76383343d+00 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0  , -0.02394051d0 , -0.00913353d0 , -0.01285182d0  /)
+    expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x = (/ /)
+  !  expo_gauss_j_mu_x = (/ /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -189,6 +254,36 @@ END_PROVIDER
       expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.05202849d0  , 0.01031081d0  , 0.04699157d0  , 0.01451002d0  , 0.07442576d0  , 0.02692033d0  , 0.09311842d0   /)
+    expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.00942115d0  , 0.07332421d0  , 0.0508308d0   , 0.08204949d0  , 0.0404099d0   , 0.03201288d0  , 0.01911313d0  , 0.01114732d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x_2 = (/  /)
+  !  expo_gauss_j_mu_x_2 = (/  /)
+  !  
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -293,6 +388,36 @@ END_PROVIDER
       expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0  , -0.07419098d0  , -0.022202d0    , -0.12242283d0  , -0.04177571d0  , -0.16983107d0  /)
+    expo_gauss_j_mu_1_erf = (/ 2.74057056d+00,  1.37626591d+04,  6.65578663d+01,  1.34693031d+03,  1.90547699d+01,  2.69445390d+02,  6.31845879d+00/)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0  , -0.02923298d0 , -0.01381381d0 , -0.01675923d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_1_erf = (/  /)
+  !  expo_gauss_j_mu_1_erf = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index 67d572e5..5b72b567 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -1,59 +1,79 @@
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot]
- implicit none
- BEGIN_DOC
-! number of gaussians to represent the effective potential :
-!
-! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
-!
-! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot = n_max_fit_slat + 1
+
+  BEGIN_DOC
+  ! number of gaussians to represent the effective potential :
+  !
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  !
+  ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+
+  n_gauss_eff_pot = ng_fit_jast + 1
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv]
- implicit none
- BEGIN_DOC
-! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot_deriv = n_max_fit_slat 
+
+  BEGIN_DOC
+  ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+  n_gauss_eff_pot_deriv = ng_fit_jast
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)]
 &BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)]
- implicit none
- BEGIN_DOC
-! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
-!
-! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
-!
-! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- include 'constants.include.F'
 
- integer :: i
- ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
- do i = 1, n_max_fit_slat
-  expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
-  coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
- enddo
- ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
- expo_gauss_eff_pot(n_max_fit_slat+1) = mu_erf * mu_erf
- coef_gauss_eff_pot(n_max_fit_slat+1) =  1.d0 * mu_erf * inv_sq_pi
+  BEGIN_DOC
+  ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
+  !
+  ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
+  !
+  ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer :: i
+ 
+  ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
+  do i = 1, ng_fit_jast
+   expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
+   coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
+  enddo
+
+  ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
+  expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf
+  coef_gauss_eff_pot(ng_fit_jast+1) =  1.d0 * mu_erf * inv_sq_pi
 
 END_PROVIDER 
 
+! ---
+
+double precision function eff_pot_gauss(x, mu)
+
+  BEGIN_DOC
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  END_DOC
+
+  implicit none
+  double precision, intent(in) :: x, mu
+
+  eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 
-double precision function eff_pot_gauss(x,mu)
- implicit none
- BEGIN_DOC
- ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
- END_DOC
- double precision, intent(in) :: x,mu
- eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 end
 
-
-
 ! -------------------------------------------------------------------------------------------------
 ! ---
 
@@ -129,16 +149,19 @@ END_PROVIDER
 ! ---
 
 double precision function fit_1_erf_x(x)
- implicit none
- double precision, intent(in) :: x
- BEGIN_DOC
-! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
- END_DOC
- integer :: i
- fit_1_erf_x = 0.d0
- do i = 1, n_max_fit_slat
-  fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
- enddo
+
+  BEGIN_DOC
+  ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
+  END_DOC
+
+  implicit none
+  integer :: i
+  double precision, intent(in) :: x
+
+  fit_1_erf_x = 0.d0
+  do i = 1, n_max_fit_slat
+    fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
+  enddo
 
 end
 
@@ -165,7 +188,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -175,7 +198,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -185,7 +208,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -195,7 +218,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -205,10 +228,40 @@ end
     expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.0213619d0   , 0.03221511d0  , 0.29966689d0  , 0.19178934d0  , 0.06154732d0  , 0.28214555d0  , 0.11125985d0   /)
+    expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.28189124d0  , 0.19518669d0  , 0.12161735d0  , 0.24257438d0  , 0.07309656d0  , 0.042435d0    , 0.01926109d0  , 0.02393415d0   /)
+    expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_1_erf_x_2 = (/  /)
+  !  expo_gauss_1_erf_x_2 = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 33f512cf..4694a998 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -107,50 +107,69 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing int2_grad1_u12_ao_transp ...'
   call wall_time(wall0)
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
+
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  endif
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
 
   implicit none
   integer :: ipoint
+  double precision :: wall0, wall1
 
- print*,'providing int2_grad1_u12_bimo_transp'
- double precision :: wall0, wall1
- call wall_time(wall0)
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
- !$OMP DO SCHEDULE (dynamic)
-  do ipoint = 1, n_points_final_grid
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
- call wall_time(wall1)
- print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
  implicit none
  integer          :: i, j, ipoint
@@ -165,35 +184,22 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3
  enddo
 END_PROVIDER 
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo, (3, mo_num, mo_num, n_points_final_grid)]
+! ---
 
-  BEGIN_DOC
-  !
-  ! int2_grad1_u12_bimo(:,k,i,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \chi_k(r2) \phi_i(r2) 
-  !
-  END_DOC
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
 
   implicit none
-  integer :: ipoint
-  print*,'Wrong !!'
-  stop
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao,int2_grad1_u12_bimo)
- !$OMP DO SCHEDULE (dynamic)
+  integer :: i, j, ipoint
+
   do ipoint = 1, n_points_final_grid
-
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (1,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(1,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (2,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(2,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (3,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(3,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-
+    do i = 1, ao_num
+      do j = 1, ao_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3)
+      enddo                                  
+    enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
 
 END_PROVIDER 
 
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index c1c27f06..48fa84f7 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -15,7 +15,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
  character*(128)  :: name_file 
 
   three_body_ints_bi_ort = 0.d0
-  print*,'Providing the three_body_ints_bi_ort ...'
+  print *, ' Providing the three_body_ints_bi_ort ...'
   call wall_time(wall0)
   name_file = 'six_index_tensor'
 
@@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -104,12 +104,11 @@ end subroutine give_integrals_3_body_bi_ort
 
 ! ---
 
-
 subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -170,3 +169,39 @@ end subroutine give_integrals_3_body_bi_ort_old
 
 ! ---
 
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
index 947be870..90fe9634 100644
--- a/src/bi_ortho_mos/bi_density.irp.f
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -2,47 +2,66 @@
 ! ---
 
 BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
   call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0               &
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1) )
-  if( elec_alpha_num==elec_beta_num ) then
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1))
+
+  if(elec_alpha_num==elec_beta_num) then
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha
   else
-    ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
+    ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta
   endif
+
 END_PROVIDER
 
 ! ---
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index 034a436e..d51999fc 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -37,6 +37,52 @@ end subroutine ao_to_mo_bi_ortho
 
 ! ---
 
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_2(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
 BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
 
   BEGIN_DOC
@@ -175,3 +221,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 6fa6a4c7..72f820ec 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -40,6 +40,47 @@
  END_PROVIDER
 
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) 
+  enddo
+ enddo
+
+ END_PROVIDER
+
+
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..39ea0cdf
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,155 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+ END_DOC
+ ao_prod_abs_r = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f
index d7d8fa7d..cb698fbb 100644
--- a/src/hartree_fock/fock_matrix_hf.irp.f
+++ b/src/hartree_fock/fock_matrix_hf.irp.f
@@ -1,12 +1,27 @@
+! ---
 
  BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha, (ao_num, ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta ,  (ao_num, ao_num) ]
- use map_module
- implicit none
+&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta , (ao_num, ao_num) ]
+
  BEGIN_DOC
- ! Alpha and Beta Fock matrices in AO basis set
+ !
+ ! 2-e part of alpha and beta Fock matrices (F^{a} & F^{b}) in AO basis set
+ ! 
+ ! F^{a} = h + G^{a}
+ ! F^{b} = h + G^{b}
+ ! 
+ ! where : 
+ !         F^{a} = J^{a} + J^{b} - K^{a} ==> G_{ij}^{a} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{a} (ki|lj)
+ !         F^{b} = J^{a} + J^{b} - K^{b} ==> G_{ij}^{b} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{b} (ki|lj)
+ ! 
+ ! and P_{kl} = P_{kl}^{a} + P_{kl}^{b} 
+ !
  END_DOC
 
+ use map_module
+
+ implicit none
+
  integer                        :: i,j,k,l,k1,r,s
  integer                        :: i0,j0,k0,l0
  integer*8                      :: p,q
@@ -153,6 +168,8 @@
 
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]
 &BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta,  (ao_num, ao_num) ]
  implicit none
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
index 3226073d..8cbf9dd0 100644
--- a/src/hartree_fock/scf.irp.f
+++ b/src/hartree_fock/scf.irp.f
@@ -68,20 +68,29 @@ subroutine create_guess
   endif
 end
 
-subroutine run
+! ---
+
+subroutine run()
 
   BEGIN_DOC
-!   Run SCF calculation
+  ! Run SCF calculation
   END_DOC
 
   use bitmasks
   implicit none
 
-  integer                        :: i_it, i, j, k
-
   mo_label = 'Orthonormalized'
 
-  call Roothaan_Hall_SCF
+  PROVIDE scf_algorithm
+
+  if(scf_algorithm .eq. "DIIS") then
+    call Roothaan_Hall_SCF()
+  elseif(scf_algorithm .eq. "Simple") then
+    call Roothaan_Hall_SCF_Simple()
+  else
+    print *, scf_algorithm, ' not implemented yet'
+  endif
+
   call ezfio_set_hartree_fock_energy(SCF_energy)
 
 end
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
index bb585f63..5e7ef7e9 100644
--- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -17,7 +17,7 @@ program debug_integ_jmu_modif
 
   PROVIDE mu_erf j1b_pen
 
-  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_u_cst_mu_j1b()
 !  call test_v_ij_erf_rk_cst_mu_j1b()
 !  call test_x_v_ij_erf_rk_cst_mu_j1b()
 !  call test_int2_u2_j1b2()
@@ -31,6 +31,9 @@ program debug_integ_jmu_modif
 !  call test_u12_grad1_u12_j1b_grad1_j1b()
 !  !call test_gradu_squared_u_ij_mu()
 
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
 end
 
 ! ---
@@ -303,7 +306,7 @@ subroutine test_int2_grad1_u12_ao()
 
         call num_int2_grad1_u12_ao(i, j, ipoint, integ)
 
-        i_exc  = int2_grad1_u12_ao(1,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,1) 
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -315,7 +318,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(2,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -327,7 +330,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(3,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -379,7 +382,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
 
         call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
 
-        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(1,i,j,ipoint) 
+        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1)
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -391,7 +394,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(2,i,j,ipoint) 
+        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -403,7 +406,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(3,i,j,ipoint) 
+        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -595,7 +598,183 @@ subroutine test_u12_grad1_u12_j1b_grad1_j1b()
   print*, ' normalz = ', normalz
 
   return
-end subroutine test_u12_grad1_u12_j1b_grad1_j1b,
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
 
 ! ---
 
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..81a8fe71 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -70,9 +70,9 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi
 
           gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)            &
                                             + tmp2 * int2_u2_j1b2             (i,j,ipoint)            &
-                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
         enddo
       enddo
     enddo
@@ -104,11 +104,11 @@ END_PROVIDER
 
 ! ---
 
-!BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 !
 !  BEGIN_DOC
 !  !
-!  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+!  ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
 !  !
 !  END_DOC
 !
@@ -142,8 +142,8 @@ END_PROVIDER
 !    do l = 1, ao_num
 !      do i = 1, ao_num
 !        do k = 1, ao_num
-!          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
-!          !write(11,*) tc_grad_square_ao(k,i,l,j)
+!          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+!          !write(11,*) tc_grad_square_ao_loop(k,i,l,j)
 !        enddo
 !      enddo
 !    enddo
@@ -155,19 +155,23 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
   !
   END_DOC
 
   implicit none
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_square_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
   allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
@@ -177,10 +181,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     weight1 = final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      ao_i_r = weight1 * aos_in_r_array(i,ipoint)
 
       do k = 1, ao_num
-        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
 
         do j = 1, ao_num
           do l = 1, ao_num
@@ -196,7 +202,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     do l = 1, ao_num
       do i = 1, ao_num
         do k = 1, ao_num
-          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
         enddo
       enddo
     enddo
@@ -205,6 +211,9 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
   deallocate(ac_mat)
   deallocate(bc_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
@@ -328,9 +337,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num,
 
         tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
 
-        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
@@ -342,3 +351,86 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..180c9588
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,221 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test ...'
+  call wall_time(time0)
+
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+
+  provide int2_u_grad1u_x_j1b2_test
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+  provide int2_grad1u2_grad2u2_j1b2_test
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
index f3b68f43..a515e0b8 100644
--- a/src/non_h_ints_mu/j12_nucl_utils.irp.f
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -237,6 +237,23 @@ end function j12_mu
 
 ! ---
 
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
 double precision function j12_mu_gauss(r1, r2)
 
   implicit none
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..9aef436f 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -1,22 +1,21 @@
-  
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
   !
   ! where r1 = r(ipoint)
   !
   ! if J(r1,r2) = u12:
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
   !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
   !
   ! if J(r1,r2) = u12 x v1 x v2
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
   !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
   !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
   !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
@@ -25,6 +24,95 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
   !
   END_DOC
 
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
   implicit none
   integer          :: ipoint, i, j
   double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
@@ -49,32 +137,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
           tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
           tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
 
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
         enddo
       enddo
     enddo
 
   else
 
-    do ipoint = 1, n_points_final_grid
-      x = final_grid_points(1,ipoint)
-      y = final_grid_points(2,ipoint)
-      z = final_grid_points(3,ipoint)
-
-      do j = 1, ao_num
-        do i = 1, ao_num
-          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
-
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
-        enddo
-      enddo
-    enddo
-
-    int2_grad1_u12_ao *= 0.5d0
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
 
   endif
 
@@ -82,11 +154,11 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
   !
   ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
   !
@@ -98,33 +170,48 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_and_lapl_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
+  ! ---
+
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
-      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
-      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
-      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      !ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      ao_i_r  = weight1 * aos_in_r_array     (i,ipoint)
+      ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3)
 
       do k = 1, ao_num
-        ao_k_r = aos_in_r_array_transp(ipoint,k)
+        !ao_k_r = aos_in_r_array_transp(ipoint,k)
+        ao_k_r = aos_in_r_array(k,ipoint)
 
-        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
-        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
-        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) 
 
         do j = 1, ao_num
           do l = 1, ao_num
 
-            contrib_x = int2_grad1_u12_ao(1,l,j,ipoint) * tmp_x 
-            contrib_y = int2_grad1_u12_ao(2,l,j,ipoint) * tmp_y 
-            contrib_z = int2_grad1_u12_ao(3,l,j,ipoint) * tmp_z 
+            contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x 
+            contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y 
+            contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z 
 
             ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
           enddo
@@ -132,7 +219,122 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
  
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao ...'
+  call wall_time(time0)
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) 
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) 
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
+              , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
   do j = 1, ao_num
     do l = 1, ao_num
       do i = 1, ao_num
@@ -142,10 +344,16 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
   deallocate(ac_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
 
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..4d85e061
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,174 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1)
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2)
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_test ...'
+  call wall_time(time0)
+
+  provide int2_grad1_u12_ao_test 
+ 
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                   &
+              , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num)
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 979296d1..81747553 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -7,17 +7,22 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   integer          :: i, j, k, l
   double precision :: wall1, wall0
 
+  print *, ' providing ao_tc_int_chemist ...'
   call wall_time(wall0)
-
-  do j = 1, ao_num
-    do l = 1, ao_num
-      do i = 1, ao_num
-        do k = 1, ao_num
-          ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-        enddo
-      enddo
-    enddo
-  enddo
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
 
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
@@ -26,6 +31,32 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist_test ...'
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
 
   BEGIN_DOC
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
index df1eb71d..78fddf54 100644
--- a/src/non_hermit_dav/biorthog.irp.f
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -283,16 +283,16 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   ! -------------------------------------------------------------------------------------
   !
 
-  print *, ' '
-  print *, ' Computing the left/right eigenvectors ...'
-  print *, ' '
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
 
-  allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) 
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
   
-  print *, ' fock matrix'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') A(i,:)
-  enddo
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
 
   !thr_cut = 1.d-15
   !call cancel_small_elmts(A, n, thr_cut)
@@ -301,11 +301,11 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
   !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
 
-  print *, ' '
-  print *, ' eigenvalues'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') WR(i), WI(i)
-  enddo
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
   !print *, ' right eigenvect bef' 
   !do i = 1, n
   !  write(*, '(1000(F16.10,X))') VR(:,i)
@@ -328,9 +328,10 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   !                  track & sort the real eigenvalues 
 
   n_good = 0
-  thr    = 1.d-3
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
   do i = 1, n
-    print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
     if(dabs(WI(i)) .lt. thr) then
       n_good += 1
     else
@@ -404,23 +405,24 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
 
-    print *, ' lapack vectors are normalized and bi-orthogonalized'
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
     deallocate(S)
     return
 
-  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
 
-    print *, ' lapack vectors are not normalized but bi-orthogonalized'
-    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
 
-    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
 
-    deallocate(S)
-    return
+  !  deallocate(S)
+  !  return
 
   else
 
-    print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
 
     ! ---
 
@@ -442,8 +444,8 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
     endif
     call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
 
-    !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
-    !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec)
+    !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
 
     ! ---
 
@@ -609,7 +611,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if(accu_nd .lt. 1d-8) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     print *, ' L & T bi-orthogonality: ok'
@@ -621,7 +623,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
     print *, ' L & T bi-orthogonality: not imposed yet'
     print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
+    call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
@@ -631,7 +633,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
   return
 
-end 
+end subroutine non_hrmt_bieig_random_diag
 
 ! ---
 
@@ -959,7 +961,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if( accu_nd .lt. 1d-8 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     !print *, ' L & T bi-orthogonality: ok'
@@ -971,7 +973,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
     !print *, ' L & T bi-orthogonality: not imposed yet'
     !print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n, leigvec, reigvec)
+    call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
index 53c62ce8..0d652af4 100644
--- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -930,7 +930,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -973,7 +973,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
 
   ! ---
 
@@ -1097,10 +1097,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1160,10 +1160,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize vectors'
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
 
   ! ---
 
@@ -1196,10 +1196,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1224,6 +1224,7 @@ subroutine impose_orthog_svd(n, m, C)
   if(num_linear_dependencies > 0) then
     write(*,*) ' linear dependencies = ', num_linear_dependencies
     write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
     stop
   endif
 
@@ -1256,10 +1257,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1296,10 +1297,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1358,10 +1359,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
   deallocate(S)
 
 end subroutine impose_orthog_svd_overlap
@@ -1528,11 +1529,11 @@ subroutine impose_orthog_degen_eigvec(n, e0, C0)
   enddo
 
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -1677,7 +1678,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
   double precision                :: accu_d, accu_nd, s_tmp
   double precision, allocatable   :: S(:,:)
 
-  print *, ' check bi-orthonormality'
+  !print *, ' check bi-orthonormality'
 
   ! ---
 
@@ -1714,15 +1715,19 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print*, '    diag acc: ', accu_d
-  print*, ' nondiag acc: ', accu_nd
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
 
   ! ---
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
 
     do i = 1, m
-      print *, i, S(i,i)
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
       if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
         s_tmp = 1.d0 / dsqrt(S(i,i))
         do j = 1, n
@@ -1757,8 +1762,8 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print *, '    diag acc: ', accu_d
-  print *, ' nondiag acc: ', accu_nd
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
 
   deallocate(S)
 
@@ -1801,10 +1806,10 @@ subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1852,17 +1857,18 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   integer                       :: i, j
   double precision, allocatable :: SS(:,:)
 
-  print *, ' check bi-orthogonality'
+  !print *, ' check bi-orthogonality'
 
   ! ---
 
   call dgemm( 'T', 'N', m, m, n, 1.d0          &
             , Vl, size(Vl, 1), Vr, size(Vr, 1) &
             , 0.d0, S, size(S, 1) )
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1877,12 +1883,12 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
 
-  print *, ' accu_nd = ', accu_nd
-  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
 
   ! ---
 
-  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
     print *, ' non bi-orthogonal vectors !'
     print *, ' accu_nd = ', accu_nd
     print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
@@ -1912,12 +1918,12 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
             , V, size(V, 1), V, size(V, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ''
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
-  print *, ''
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1981,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i), e0(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2181,11 +2187,11 @@ subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0,
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2414,10 +2420,10 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2489,10 +2495,11 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   ! ---
@@ -2806,10 +2813,10 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F25.16,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2886,10 +2893,11 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap aft SVD with overlap: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   return
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
index 07ac5917..4dea5f66 100644
--- a/src/non_hermit_dav/new_routines.irp.f
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -132,9 +132,9 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -149,14 +149,14 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
     deallocate(S_nh_inv_half)
     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-     if( accu_nd .lt. 1d-10 ) then
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -200,10 +200,10 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -354,14 +354,14 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
     print *, ' bi-orthogonality: not imposed yet'
-    if(complex_root)then 
+    if(complex_root) then 
      print *, ' '
      print *, ' '
      print *, ' orthog between degen eigenvect' 
@@ -369,9 +369,9 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
      call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
      call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) 
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S,  thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) 
 
-     if( accu_nd .lt. 1d-10 ) then
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ', accu_nd
@@ -387,8 +387,8 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      print*,'S^{-1/2} exists !!'
      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -431,10 +431,10 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -472,6 +472,7 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   double precision :: accu,thr_cut
   double precision, allocatable :: S_nh_inv_half(:,:)
   logical :: complex_root
+  double precision :: thr_norm=1d0
 
 
   thr_cut = 1.d-15
@@ -580,9 +581,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -593,9 +594,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
     print *, ' '
     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
     call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
     print*,'accu_nd = ',accu_nd
-    if( accu_nd .lt. 1d-10 ) then
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -608,8 +609,8 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
      endif
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -651,11 +652,11 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   print*,'Checking for final reigvec/leigvec'
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
-  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
diff --git a/src/scf_utils/diagonalize_fock.irp.f b/src/scf_utils/diagonalize_fock.irp.f
index 5188581a..008344f2 100644
--- a/src/scf_utils/diagonalize_fock.irp.f
+++ b/src/scf_utils/diagonalize_fock.irp.f
@@ -20,6 +20,12 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
      enddo
    enddo
 
+  !print *, ' Fock_matrix_MO :'
+  !do i = 1, mo_num
+  !  write(*, '(100(f15.7, 2x))') (Fock_matrix_MO(j,i), j = 1, mo_num)
+  !enddo
+
+
    if(frozen_orb_scf)then
      integer                        :: iorb,jorb
      do i = 1, n_core_orb
@@ -57,7 +63,6 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    do i = elec_beta_num+1, elec_alpha_num
      F(i,i) += 0.5d0*level_shift
    enddo
-
    do i = elec_alpha_num+1, mo_num
      F(i,i) += level_shift
    enddo
@@ -90,6 +95,10 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    call dsyevd( 'V', 'U', mo_num, F,                             &
        size(F,1), diag, work, lwork, iwork, liwork, info)
    deallocate(iwork)
+  !print*, ' Fock eigval:'
+  !do i = 1, mo_num
+  !  print *, diag(i)
+  !enddo
 
 
    if (info /= 0) then
diff --git a/src/scf_utils/diis.irp.f b/src/scf_utils/diis.irp.f
index 713de1b3..63a847ce 100644
--- a/src/scf_utils/diis.irp.f
+++ b/src/scf_utils/diis.irp.f
@@ -1,3 +1,5 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
  implicit none
  BEGIN_DOC
@@ -12,6 +14,8 @@ BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
   implicit none
   BEGIN_DOC
@@ -60,6 +64,8 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
   implicit none
   begin_doc
@@ -69,6 +75,7 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
      FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1))
 END_PROVIDER
 
+! ---
 
  BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ]
 &BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ]
@@ -137,3 +144,175 @@ END_PROVIDER
 
 END_PROVIDER
 
+! ---
+
+!BEGIN_PROVIDER [double precision, error_diis_Fmo, (ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! error_diis_Fmo = (S x C) x [F_mo x \eta_occ - \eta_occ x F_mo] x (S x C).T
+!  !
+!  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j
+!  double precision, allocatable :: tmp(:,:)
+!
+!  provide Fock_matrix_mo
+!
+!  allocate(tmp(mo_num,mo_num))
+!  tmp = 0.d0
+!
+!  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+!  do j = 1, elec_alpha_num
+!    do i = elec_alpha_num + 1, mo_num
+!      tmp(i,j) = Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_alpha_num + 1, mo_num
+!    do i = 1, elec_alpha_num
+!      tmp(i,j) = -Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+!  do j = 1, elec_beta_num
+!    do i = elec_beta_num + 1, mo_num
+!      tmp(i,j) += Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_beta_num + 1, mo_num
+!    do i = 1, elec_beta_num
+!      tmp(i,j) -= Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  call mo_to_ao(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+!  
+!  deallocate(tmp)
+!
+!END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, error_diis_Fmo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! error_diis_Fmo = [F_mo x \eta_occ - \eta_occ x F_mo]
+  !
+  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j
+  double precision, allocatable :: tmp(:,:)
+
+  provide Fock_matrix_mo
+
+  error_diis_Fmo = 0.d0
+
+  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+  do j = 1, elec_alpha_num
+    do i = elec_alpha_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_alpha_num + 1, mo_num
+    do i = 1, elec_alpha_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+  do j = 1, elec_beta_num
+    do i = elec_beta_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_beta_num + 1, mo_num
+    do i = 1, elec_beta_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  !allocate(tmp(ao_num,ao_num))
+  !call mo_to_ao(error_diis_Fmo, size(error_diis_Fmo, 1), tmp, size(tmp, 1))
+  !call ao_to_mo(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+  !deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_a, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                                 &
+            , Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1), SCF_density_matrix_ao_alpha, size(SCF_Density_Matrix_AO_alpha, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                             &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                        &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_b, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                             &
+            , Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1), SCF_density_matrix_ao_beta, size(SCF_Density_Matrix_AO_beta, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                           &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                      &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_a, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1), FPS_SPF_Matrix_MO_a, size(FPS_SPF_Matrix_MO_a, 1))
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_b, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1), FPS_SPF_Matrix_MO_b, size(FPS_SPF_Matrix_MO_b, 1))
+END_PROVIDER
+
+! ---
+
diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f
index 539f1eb3..baefcd6c 100644
--- a/src/scf_utils/fock_matrix.irp.f
+++ b/src/scf_utils/fock_matrix.irp.f
@@ -267,3 +267,5 @@ BEGIN_PROVIDER [ double precision, SCF_energy ]
 
 END_PROVIDER
 
+! ---
+
diff --git a/src/scf_utils/rh_scf_simple.irp.f b/src/scf_utils/rh_scf_simple.irp.f
new file mode 100644
index 00000000..cd7ba66f
--- /dev/null
+++ b/src/scf_utils/rh_scf_simple.irp.f
@@ -0,0 +1,129 @@
+subroutine Roothaan_Hall_SCF_Simple
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  integer                        :: iteration_SCF, dim_DIIS
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num))
+
+
+  dim_DIIS     = 0
+  mo_coef_save = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  max_error_DIIS      = 1.d0
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+    TOUCH MO_coef
+
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+
+    !double precision :: level_shift_save
+    !level_shift_save = level_shift
+    !mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    !do while (Delta_energy_SCF > 0.d0)
+    !  mo_coef(1:ao_num,1:mo_num) = mo_coef_save
+    !  if (level_shift <= .1d0) then
+    !    level_shift = 1.d0
+    !  else
+    !    level_shift = level_shift * 3.0d0
+    !  endif
+    !  TOUCH mo_coef level_shift
+    !  mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num)
+    !  if(frozen_orb_scf)then
+    !    call reorder_core_orb
+    !    call initialize_mo_coef_begin_iteration
+    !  endif
+    !  TOUCH mo_coef
+    !  Delta_energy_SCF = SCF_energy - energy_SCF_previous
+    !  energy_SCF = SCF_energy
+    !  if (level_shift-level_shift_save > 40.d0) then
+    !    level_shift = level_shift_save * 4.d0
+    !    SOFT_TOUCH level_shift
+    !    exit
+    !  endif
+    !enddo
+    !level_shift = level_shift * 0.5d0
+    !SOFT_TOUCH level_shift
+
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if(Delta_energy_SCF < 0.d0) then
+      call save_mos()
+    endif
+    if(qp_stop()) exit
+
+  enddo
+
+  if (iteration_SCF < n_it_SCF_max) then
+    mo_label = 'Canonical'
+  endif
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      size(Fock_matrix_mo,2),mo_label,1,.true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 2c35fe0d..9ec61ced 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -29,11 +29,11 @@ END_DOC
 
   call write_time(6)
 
-  print*,'Energy of the guess = ',SCF_energy
+  print*,'energy of the guess = ',SCF_energy
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
-    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
 
@@ -66,7 +66,8 @@ END_DOC
 
     dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
 
-    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
+    if( (scf_algorithm == 'DIIS') .and. (dabs(Delta_energy_SCF) > 1.d-6))  then
+    !if(scf_algorithm == 'DIIS') then
 
       ! Store Fock and error matrices at each iteration
       index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
@@ -85,10 +86,9 @@ END_DOC
           iteration_SCF,dim_DIIS                                       &
           )
 
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
-
     endif
 
     MO_coef = eigenvectors_Fock_matrix_MO
@@ -99,18 +99,14 @@ END_DOC
 
     TOUCH MO_coef
 
-!   Calculate error vectors
-
-    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
-
 !   SCF energy
 
     energy_SCF = SCF_energy
-    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
-    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then
       Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
     endif
 
@@ -131,19 +127,24 @@ END_DOC
         call initialize_mo_coef_begin_iteration
       endif
       TOUCH mo_coef
-      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
       energy_SCF = SCF_energy
       if (level_shift-level_shift_save > 40.d0) then
         level_shift = level_shift_save * 4.d0
         SOFT_TOUCH level_shift
         exit
       endif
+
       dim_DIIS=0
     enddo
+
     level_shift = level_shift * 0.5d0
     SOFT_TOUCH level_shift
     energy_SCF_previous = energy_SCF
 
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
 !   Print results at the end of each iteration
 
     write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
@@ -175,7 +176,7 @@ END_DOC
    call save_mos
   endif
 
-  call write_double(6, Energy_SCF, 'SCF energy')
+  call write_double(6, energy_SCF, 'SCF energy')
 
   call write_time(6)
 
diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
index 60201f5f..eb812401 100644
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
@@ -14,21 +14,36 @@ program save_bitcpsileft_for_qmcchem
 
   e_ref = 0.d0
   iunit = 13
-  open(unit=iunit,file=trim(ezfio_filename)//'/simulation/e_ref',action='write')
-  call ezfio_has_fci_energy_pt2(exists)
-
-  if(.not.exists) then
-    call ezfio_has_fci_energy(exists)
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
 
+    call ezfio_has_fci_energy_pt2(exists)
     if(.not.exists) then
-      call ezfio_has_tc_scf_bitc_energy(exists)
-      if(exists) then
-        call ezfio_get_tc_scf_bitc_energy(e_ref)
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
       endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
     endif
 
-  endif
-  write(iunit,*) e_ref
+    write(iunit,*) e_ref
+
   close(iunit)
 
 end
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
new file mode 100644
index 00000000..291c52ef
--- /dev/null
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -0,0 +1,70 @@
+! ---
+
+program tc_som
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+  print *, ' do not forget to do tc-scf first'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  read_wf = .true.
+  touch read_wf
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer          :: i, i_HF, degree
+  double precision :: hmono_1, htwoe_1, hthree_1, htot_1
+  double precision :: hmono_2, htwoe_2, hthree_2, htot_2
+  double precision :: U_SOM
+
+  PROVIDE N_int N_det
+
+  do i = 1, N_det
+    call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int)
+    if(degree == 0) then
+      i_HF = i
+      exit
+    endif
+  enddo
+  print *, ' HF determinants:', i_HF
+  print *, '          N_det :', N_det
+
+  U_SOM = 0.d0 
+  do i = 1, N_det
+    if(i == i_HF) cycle
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    U_SOM += htot_1 * htot_2
+  enddo
+  U_SOM = 0.5d0 * U_SOM
+  print *, ' U_SOM = ', U_SOM
+  
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index a49a5958..ebd43a7a 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -15,7 +15,8 @@ program test_tc_fock
   !call routine_2
 !  call routine_3()
 
- call test_3e
+! call test_3e
+ call routine_tot
 end
 
 ! ---
@@ -32,7 +33,7 @@ subroutine test_3e
  print*,'htot  = ',htot
  print*,''
  print*,''
- print*,'TC_one= ',TC_HF_one_electron_energy
+ print*,'TC_one= ',tc_hf_one_e_energy
  print*,'TC_two= ',TC_HF_two_e_energy
  print*,'TC_3e = ',diag_three_elem_hf
  print*,'TC_tot= ',TC_HF_energy
@@ -84,8 +85,8 @@ subroutine routine_3()
         print*, i, a
         stop
        endif
-       !print*, ' excited det'
-       !call debug_det(det_i, N_int)
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
  
        call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
@@ -116,3 +117,78 @@ subroutine routine_3()
 end subroutine routine_3
 
 ! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index e506d1cc..eb8fa8be 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -86,7 +86,7 @@ default: False
 type: Threshold
 doc: Threshold on the convergence of the Hartree Fock energy.
 interface: ezfio,provider,ocaml
-default: 1.e-10
+default: 1.e-12
 
 [n_it_tcscf_max]
 type: Strictly_positive_int
@@ -134,5 +134,53 @@ default: False
 type: integer
 doc: nb of Gaussians used to fit Jastrow fcts
 interface: ezfio,provider,ocaml
-default: 6
+default: 20
+
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: Simple 
+
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_biorthog_diag]
+type: Threshold
+doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_biorthog_nondiag]
+type: Threshold
+doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0
+interface: ezfio,provider,ocaml
+default: 1.e-6
 
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 856b7382..726169d9 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -1,3 +1,5 @@
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
@@ -9,32 +11,46 @@
 
   implicit none
   integer                       :: n_real_tc 
-  integer                       :: i, k, l
+  integer                       :: i, j, k, l
   double precision              :: accu_d, accu_nd, accu_tmp
-  double precision              :: thr_d, thr_nd
   double precision              :: norm
   double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-6
-
-  allocate( eigval_right_tmp(mo_num) )
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
 
   PROVIDE Fock_matrix_tc_mo_tot
 
-  call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo       & 
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
                      , n_real_tc, eigval_right_tmp )
+
   !if(max_ov_tc_scf)then
-  ! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
+  ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo                          & 
   !                    , n_real_tc, eigval_right_tmp )
   !else 
-  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot &
+  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
   !                    , n_real_tc, eigval_right_tmp )
   !endif
 
+  deallocate(F_tmp)
+
+
 !  if(n_real_tc .ne. mo_num)then
 !   print*,'n_real_tc ne mo_num ! ',n_real_tc
 !   stop
@@ -42,9 +58,12 @@
 
   eigval_fock_tc_mo = eigval_right_tmp
 !  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
-!  do i = 1, mo_num
+!  do i = 1, elec_alpha_num
 !    print*, i, eigval_fock_tc_mo(i)
 !  enddo
+!  do i = elec_alpha_num+1, mo_num 
+!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
+!  enddo
 !  deallocate( eigval_right_tmp )
 
   ! L.T x R 
@@ -53,6 +72,8 @@
             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
             , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
 
+  ! ---
+
   accu_d  = 0.d0
   accu_nd = 0.d0
   do i = 1, mo_num
@@ -63,45 +84,80 @@
       else
         accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
         accu_nd += accu_tmp * accu_tmp
-        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
          print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
         endif
       endif
     enddo 
   enddo
-  accu_nd = dsqrt(accu_nd)/accu_d
-
-  if(accu_nd .gt. thr_nd) then
+  accu_nd = dsqrt(accu_nd) / accu_d
+  if(accu_nd .gt. thresh_biorthog_nondiag) then
     print *, ' bi-orthog failed'
-    print*,'accu_nd MO = ', accu_nd, thr_nd
-    print*,'overlap_fock_tc_eigvec_mo = '
+    print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+    print *, ' overlap_fock_tc_eigvec_mo = '
     do i = 1, mo_num
       write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
     enddo
-   stop
+    stop
   endif
 
-  if( dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d ) then
-    print *, 'mo_num     = ', mo_num 
-    print *, 'accu_d  MO = ', accu_d, thr_d
-    print *, 'normalizing vectors ...'
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thresh_biorthog_diag
+    print *, ' normalizing vectors ...'
     do i = 1, mo_num
       norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
-      if(norm .gt. thr_d) then
+      if(norm .gt. thresh_biorthog_diag) then
         do k = 1, mo_num
           fock_tc_reigvec_mo(k,i) *= 1.d0/norm
           fock_tc_leigvec_mo(k,i) *= 1.d0/norm
         enddo
       endif
     enddo
+
     call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
               , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
               , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
               , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thresh_biorthog_diag) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
   endif
  
+  ! ---
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
@@ -117,6 +173,7 @@ END_PROVIDER
   double precision              :: accu, accu_d
   double precision, allocatable :: tmp(:,:)
 
+  PROVIDE mo_l_coef mo_r_coef
 
 !  ! MO_R x R
    call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..ff1077f5
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,186 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ]
+ 
+  implicit none
+ 
+  if(threshold_DIIS_TCSCF == 0.d0) then
+    threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf)
+  else
+    threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF
+  endif
+  ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_alpha = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_beta = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                             &
+            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  tmp = 0.d0
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S - S x Q x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
+            , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..fccfd837
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,405 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index 6b1c1d77..7403049c 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -1,63 +1,147 @@
 
 ! ---
 
- BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
- BEGIN_DOC
-! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
-!
-! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
- END_DOC
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
   implicit none
   integer          :: i, j, k, l
   double precision :: density, density_a, density_b
+  double precision :: t0, t1
 
-  two_e_tc_non_hermit_integral_alpha = 0.d0
-  two_e_tc_non_hermit_integral_beta  = 0.d0
+  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
+  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
 
-  !! TODO :: parallelization properly done
   do i = 1, ao_num
     do k = 1, ao_num
-!!$OMP PARALLEL                  &
-!!$OMP DEFAULT (NONE)            &
-!!$OMP PRIVATE (j,l,density_a,density_b,density) & 
-!!$OMP SHARED (i,k,ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,ao_non_hermit_term_chemist) & 
-!!$OMP SHARED (two_e_tc_non_hermit_integral_alpha,two_e_tc_non_hermit_integral_beta)
-!!$OMP DO SCHEDULE (dynamic)
       do j = 1, ao_num
         do l = 1, ao_num
 
           density_a = TCSCF_density_matrix_ao_alpha(l,j)
           density_b = TCSCF_density_matrix_ao_beta (l,j)
-          density   = density_a + density_b                      
+          density   = density_a + density_b
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !                                         rho_a(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !                                         rho_b(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+          two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !                                         rho_b(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
         enddo
       enddo
-!!$OMP END DO
-!!$OMP END PARALLEL
     enddo
   enddo
 
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_alpha = 0.d0
+  two_e_tc_non_hermit_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+ !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_tc_tot(k,i,l,j)
+          I_kjli = ao_two_e_tc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+
 END_PROVIDER 
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
-  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot &
-                          + two_e_tc_non_hermit_integral_alpha 
+
+  implicit none
+
+  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
 
 END_PROVIDER 
 
@@ -66,102 +150,149 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
 
   BEGIN_DOC
- ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
+
   implicit none
 
-  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot &
-                         + two_e_tc_non_hermit_integral_beta 
+  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta 
 
 END_PROVIDER 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
-!  END_DOC
-!  Fock_matrix_tc_ao_tot = 0.5d0 * (Fock_matrix_tc_ao_alpha + Fock_matrix_tc_ao_beta)
-!END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
-                         , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                  , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
   endif
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
-                         , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                  , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
   endif
+
 END_PROVIDER
 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
-!  END_DOC
-!  Fock_matrix_tc_mo_tot = 0.5d0 * (Fock_matrix_tc_mo_alpha + Fock_matrix_tc_mo_beta)
-!  if(three_body_h_tc) then
-!    Fock_matrix_tc_mo_tot += fock_3_mat
-!  endif
-!  !call restore_symmetry(mo_num, mo_num, Fock_matrix_tc_mo_tot, mo_num, 1.d-10)
-!END_PROVIDER 
-
 ! ---
 
  BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit]
- implicit none
+
+  implicit none
   integer :: i, k
-  grad_non_hermit_left = 0.d0
+
+  grad_non_hermit_left  = 0.d0
   grad_non_hermit_right = 0.d0
+
   do i = 1, elec_beta_num ! doc --> SOMO
     do k = elec_beta_num+1, elec_alpha_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      !grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      !grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = 1, elec_beta_num ! doc --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
- grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
+  !grad_non_hermit = dsqrt(grad_non_hermit_left) + dsqrt(grad_non_hermit_right)
+  grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
 END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a99c7698..2f33cd17 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -73,6 +73,29 @@
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
        enddo
      enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
 
    endif
 
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index f73a5049..424eeffd 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -70,52 +70,76 @@ subroutine give_fock_ia_three_e_total(i,a,contrib)
 
 end
 
+! ---
+
 BEGIN_PROVIDER [double precision, diag_three_elem_hf]
- implicit none
- integer :: i,j,k,ipoint,mm
- double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231
- print*,'providing diag_three_elem_hf'
- if(.not.three_body_h_tc)then
-  diag_three_elem_hf = 0.d0
- else
-  if(.not.bi_ortho)then
-   one_third = 1.d0/3.d0
-   two_third = 2.d0/3.d0
-   four_third = 4.d0/3.d0
-   diag_three_elem_hf = 0.d0
-   do i = 1, elec_beta_num
-    do j = 1, elec_beta_num
-     do k = 1, elec_beta_num
-      call  give_integrals_3_body(k,j,i,j,i,k,exchange_int_231)   
-      diag_three_elem_hf += two_third * exchange_int_231
-     enddo
-    enddo
-   enddo
-   do mm = 1, 3
-    do ipoint = 1, n_points_final_grid
-     weight = final_weight_at_r_vector(ipoint)                                                                          
-     contrib   = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm)  & 
-                -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                                 & 
-                -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
-     contrib  *= four_third
-     contrib  += -two_third  * fock_3_rho_beta(ipoint)     * fock_3_w_kl_w_kl(ipoint,mm) & 
-                - four_third * fock_3_w_kk_sum(ipoint,mm)  * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
-     diag_three_elem_hf += weight * contrib
-    enddo
-   enddo
-   diag_three_elem_hf = - diag_three_elem_hf
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
   else
-   double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb
-   provide mo_l_coef mo_r_coef
-   call give_aaa_contrib(integral_aaa)
-   call give_aab_contrib(integral_aab)
-   call give_abb_contrib(integral_abb)
-   call give_bbb_contrib(integral_bbb)
-   diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
+
+    endif
+
   endif
- endif
+
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
  implicit none 
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
index b0345957..f73171a3 100644
--- a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -1,202 +1,286 @@
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_a_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_b_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_cs_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = 1, elec_beta_num
-!      call contrib_3e_sss(a,i,j,k,contrib_sss)
-!      call contrib_3e_soo(a,i,j,k,contrib_soo)
-!      call contrib_3e_sos(a,i,j,k,contrib_sos)
-!      contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- ! negative terms :: exchange contrib
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      new  = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) & 
-             -1.5d0 * exch_13_int - exch_23_int
-      fock_cs_3e_bi_orth(a,i) += new
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
     enddo
-   enddo
-  
   enddo
- enddo
- fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
 
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_a_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_beta_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) &
-      + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss
- fock_a_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = 1, elec_alpha_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call contrib_3e_sss(a,i,j,k,contrib_sss)
-      fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-
-
-
+! ---
 
 BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
- double precision :: new
- fock_b_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_soo
- fock_b_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_alpha_num
-      call contrib_3e_soo(a,i,j,k,contrib_soo)
-      fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-subroutine contrib_3e_sss(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
- call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- integral = direct_int + c_3_int + c_minus_3_int 
- ! negative terms :: exchange contrib
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
- call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
- integral += - exch_13_int - exch_23_int  - exch_12_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
 end
 
+! ---
+
 subroutine contrib_3e_soo(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_23_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
- integral = direct_int - exch_23_int 
- integral = -integral
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
 end
 
-subroutine contrib_3e_sos(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
- integral = direct_int - exch_13_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
 end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
new file mode 100644
index 00000000..306c78b3
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -0,0 +1,362 @@
+! ---
+
+subroutine rh_tcscf_diis()
+
+  implicit none
+
+  integer                       :: i, j, it
+  integer                       :: dim_DIIS, index_dim_DIIS
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta
+  double precision              :: tc_grad, g_save, g_delta, g_delta_th
+  double precision              :: level_shift_save, rate_th
+  double precision              :: t0, t1
+  double precision              :: er_DIIS, er_delta, er_save, er_delta_th
+  double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  it          = 0
+  e_save      = 0.d0
+  dim_DIIS    = 0
+  g_delta_th  = 1d0
+  er_delta_th = 1d0
+  rate_th     = 100.d0 !0.01d0 !0.2d0
+
+  allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num))
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF))
+  F_DIIS = 0.d0
+  E_DIIS = 0.d0
+
+  call write_time(6)
+
+  ! ---
+
+  PROVIDE level_shift_TCSCF
+  PROVIDE mo_l_coef mo_r_coef
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = TC_HF_energy
+  etc_1e  = TC_HF_one_e_energy
+  etc_2e  = TC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  tc_grad = grad_non_hermit
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+
+  e_save  = etc_tot
+  g_save  = tc_grad
+  er_save = er_DIIS
+
+  call wall_time(t1)
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  ! ---
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. threshold_DIIS_nonzero_TCSCF))
+
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if(dabs(e_delta) > 1.d-12) then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao           (i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS)
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    !if((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) then
+    if((g_delta > rate_th * g_delta_th) .and. (it > 1)) then
+    !if((g_delta > 0.d0) .and. (it > 1)) then
+
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+    endif
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+    !do while((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1))
+    do while((g_delta > rate_th * g_delta_th) .and. (it > 1))
+      print *, ' big or bad step : ', g_delta, rate_th * g_delta_th
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      g_delta  = grad_non_hermit         -  g_save
+      er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad  = grad_non_hermit
+    er_DIIS  = maxval(abs(FQS_SQF_mo))
+    e_delta  = dabs(etc_tot - e_save)
+    g_delta  = tc_grad - g_save
+    er_delta = er_DIIS - er_save
+    
+    e_save           = etc_tot
+    g_save           = tc_grad
+    level_shift_save = level_shift_TCSCF
+    er_save          = er_DIIS
+
+    g_delta_th  = dabs(tc_grad) ! g_delta)
+    er_delta_th = dabs(er_DIIS) !er_delta)
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    if(g_delta .lt. 0.d0) then
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
+
+  call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: it, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: E_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
new file mode 100644
index 00000000..30798e3d
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -0,0 +1,129 @@
+! ---
+
+subroutine rh_tcscf_simple()
+
+  implicit none
+  integer                       :: i, j, it, dim_DIIS
+  double precision              :: t0, t1
+  double precision              :: e_save, e_delta, rho_delta
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad
+  double precision              :: er_DIIS
+  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
+
+  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  if(.not. bi_ortho) then
+   print *, ' grad_hermit = ', grad_hermit
+   call save_good_hermit_tc_eigvectors
+   TOUCH mo_coef 
+   call save_mos
+  endif
+
+  ! ---
+
+  if(bi_ortho) then
+
+    PROVIDE level_shift_tcscf
+    PROVIDE mo_l_coef mo_r_coef
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+            , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+
+    ! first iteration (HF orbitals)
+    call wall_time(t0)
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad = grad_non_hermit
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    do while(tc_grad .gt. dsqrt(thresh_tcscf))
+      call wall_time(t0)
+
+      it += 1
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        stop
+      endif
+
+      mo_l_coef = fock_tc_leigvec_ao
+      mo_r_coef = fock_tc_reigvec_ao
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      etc_tot = TC_HF_energy
+      etc_1e  = TC_HF_one_e_energy
+      etc_2e  = TC_HF_two_e_energy
+      etc_3e  = 0.d0
+      if(three_body_h_tc) then
+        etc_3e = diag_three_elem_hf
+      endif
+      tc_grad = grad_non_hermit
+      er_DIIS = maxval(abs(FQS_SQF_mo))
+      e_delta = dabs(etc_tot - e_save)
+      e_save  = etc_tot
+
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+      call wall_time(t1)
+      write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+        it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    enddo
+
+  else
+
+   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) )
+      print*,'grad_hermit = ',grad_hermit
+      it += 1
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
+      call save_good_hermit_tc_eigvectors
+      TOUCH mo_coef 
+      call save_mos
+    enddo
+
+  endif
+
+  print *, ' TCSCF Simple converged !'
+  call print_energy_and_mos()
+
+  deallocate(rho_old, rho_new)
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
index d53991ed..fc4a7935 100644
--- a/src/tc_scf/rotate_tcscf_orbitals.irp.f
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -260,14 +260,10 @@ subroutine fix_right_to_one()
   integer                       :: i, j, m, n, mm, tot_deg
   double precision              :: accu_d, accu_nd
   double precision              :: de_thr, ei, ej, de
-  double precision              :: thr_d, thr_nd
   integer,          allocatable :: deg_num(:)
   double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
   double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
 
-  thr_d  = 1d-7
-  thr_nd = 1d-7
-
   n = ao_num
   m = mo_num
 
@@ -340,7 +336,7 @@ subroutine fix_right_to_one()
       ! ---
 
       call impose_weighted_orthog_svd(n, mm, W, R)
-      call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L)
+      call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L)
 
       ! ---
 
@@ -353,7 +349,7 @@ subroutine fix_right_to_one()
     endif
   enddo
 
-  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.)
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.)
 
   deallocate(W, deg_num)
 
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 42925e41..596ae500 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -116,7 +116,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
     print *, ' ------------------------------------'
     call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
 
-    print *, ' Overlap lef-right '
+    print *, ' Overlap left-right '
     call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
     do j = 1, n_degen
      write(*,'(100(F8.4,X))') stmp(:,j)
@@ -259,7 +259,7 @@ subroutine orthog_functions(m, n, coef, overlap)
   double precision, intent(in)    :: overlap(m,m)
   double precision, intent(inout) :: coef(m,n)
   double precision, allocatable   :: stmp(:,:)
-  integer                         :: j
+  integer                         :: j, k
 
   allocate(stmp(n,n))
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
@@ -270,7 +270,13 @@ subroutine orthog_functions(m, n, coef, overlap)
   call impose_orthog_svd_overlap(m, n, coef, overlap)
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
   do j = 1, n
-    coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    ! ---
+    ! TODO: MANU check ici
+    !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    do k = 1, m
+      coef(k,j) *= 1.d0/dsqrt(stmp(j,j))
+    enddo
+    ! ---
   enddo
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
 
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 48cbbdc0..187750ff 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -1,7 +1,9 @@
+! ---
+
 program tc_scf
 
   BEGIN_DOC
-! TODO : Put the documentation of the program here
+  ! TODO : Put the documentation of the program here
   END_DOC
 
   implicit none
@@ -15,43 +17,51 @@ program tc_scf
 !  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-  !call create_guess
-  !call orthonormalize_mos
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  !call create_guess()
+  !call orthonormalize_mos()
+
+  PROVIDE tcscf_algorithm
+  if(tcscf_algorithm == 'DIIS') then
+    call rh_tcscf_diis()
+  elseif(tcscf_algorithm == 'Simple') then
+    call rh_tcscf_simple()
+  else
+    print *, ' not implemented yet', tcscf_algorithm
+    stop
+  endif
 
-  call routine_scf()
   call minimize_tc_orb_angles()
   call print_energy_and_mos()
 
-
 end
 
 ! ---
 
-subroutine create_guess
-
-  BEGIN_DOC
-  !   Create a MO guess if no MOs are present in the EZFIO directory
-  END_DOC
+subroutine create_guess()
 
   implicit none
   logical :: exists
 
   PROVIDE ezfio_filename
-  call ezfio_has_mo_basis_mo_coef(exists)
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
 
-  if (.not.exists) then
+  if(.not.exists) then
     mo_label = 'Guess'
-    if (mo_guess_type == "HCore") then
+    if(mo_guess_type == "HCore") then
       mo_coef = ao_ortho_lowdin_coef
       call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       TOUCH mo_coef
-      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,     &
-          size(mo_one_e_integrals,1),                            &
-          size(mo_one_e_integrals,2),                            &
-          mo_label,1,.false.)
-      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.)
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       SOFT_TOUCH mo_coef
-    else if (mo_guess_type == "Huckel") then
+    elseif (mo_guess_type == "Huckel") then
       call huckel_guess
     else
       print *,  'Unrecognized MO guess type : '//mo_guess_type
@@ -64,121 +74,3 @@ end subroutine create_guess
 
 ! ---
 
-subroutine routine_scf()
-
-  implicit none
-  integer                       :: i, j, it
-  double precision              :: e_save, e_delta, rho_delta
-  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
-
-  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
-
-  it = 0
-  print*,'iteration = ', it
-
-  !print*,'grad_hermit = ', grad_hermit
-  print*,'***'
-  print*,'TC HF total energy = ', TC_HF_energy
-  print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-  print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-  if(three_body_h_tc)then
-   print*,'TC HF 3 body       = ', diag_three_elem_hf
-  endif
-  print*,'***'
-  e_delta = 10.d0
-  e_save  = 0.d0 !TC_HF_energy
-  rho_delta = 10.d0
-
-
-  if(bi_ortho)then
-
-   mo_l_coef = fock_tc_leigvec_ao
-   mo_r_coef = fock_tc_reigvec_ao
-   rho_old   = TCSCF_bi_ort_dm_ao
-   call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-   call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-   TOUCH mo_l_coef mo_r_coef
-
-
-  else
-
-   print*,'grad_hermit = ',grad_hermit
-   call save_good_hermit_tc_eigvectors
-   TOUCH mo_coef 
-   call save_mos
-
-  endif
-
-  ! ---
-
-  if(bi_ortho) then
-
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. dsqrt(thresh_tcscf)) )
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. thresh_tcscf) )
-    !do while( it .lt. n_it_tcscf_max .and. (rho_delta .gt. thresh_tcscf) )
-    do while( it .lt. n_it_tcscf_max .and. (grad_non_hermit_right.gt. dsqrt(thresh_tcscf)) )
-
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy
-      if(three_body_h_tc)then
-       print*,'TC HF 3 body       = ', diag_three_elem_hf
-      endif
-      print*,'***'
-      e_delta = dabs( TC_HF_energy - e_save )
-      print*, 'it, delta E = ', it, e_delta
-      print*, 'it, gradient= ',grad_non_hermit_right
-      e_save    = TC_HF_energy
-      mo_l_coef = fock_tc_leigvec_ao
-      mo_r_coef = fock_tc_reigvec_ao
-
-      rho_new   = TCSCF_bi_ort_dm_ao
-      !print*, rho_new
-      rho_delta = 0.d0
-      do i = 1, ao_num 
-        do j = 1, ao_num 
-          rho_delta += dabs(rho_new(j,i) - rho_old(j,i))
-        enddo
-      enddo
-      print*, ' rho_delta =', rho_delta
-      rho_old = rho_new
-
-      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-      TOUCH mo_l_coef mo_r_coef
-
-      call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
-
-    enddo
-
-  else
-   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max )
-      print*,'grad_hermit = ',grad_hermit
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-      print*,'TC HF 3 body       = ', diag_three_elem_hf
-      print*,'***'
-      call save_good_hermit_tc_eigvectors
-      TOUCH mo_coef 
-      call save_mos
-
-    enddo
-
-  endif
-
-  print*,'Energy converged !'
-  call print_energy_and_mos
-
-  deallocate(rho_old, rho_new)
-
-end subroutine routine_scf
-
-! ---
-
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
index f6ae3e1f..4750199c 100644
--- a/src/tc_scf/tc_scf_dm.irp.f
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -1,25 +1,39 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_beta  = TCSCF_bi_ort_dm_ao_beta
- else
-  TCSCF_density_matrix_ao_beta  = SCF_density_matrix_ao_beta
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_alpha  = TCSCF_bi_ort_dm_ao_alpha
- else
-  TCSCF_density_matrix_ao_alpha  = SCF_density_matrix_ao_alpha
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
+
 END_PROVIDER 
 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
- implicit none
-  TCSCF_density_matrix_ao_tot  = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+  implicit none
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
 END_PROVIDER 
 
 
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
index aa2a16ff..611b8b4c 100644
--- a/src/tc_scf/tc_scf_energy.irp.f
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -1,6 +1,6 @@
 
  BEGIN_PROVIDER [ double precision, TC_HF_energy]
-&BEGIN_PROVIDER [ double precision, TC_HF_one_electron_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
 &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
 
   BEGIN_DOC
@@ -10,20 +10,22 @@
   implicit none
   integer :: i, j
 
+  PROVIDE mo_l_coef mo_r_coef
+
   TC_HF_energy = nuclear_repulsion
-  TC_HF_one_electron_energy = 0.d0
+  TC_HF_one_e_energy = 0.d0
   TC_HF_two_e_energy = 0.d0
 
   do j = 1, ao_num
     do i = 1, ao_num
       TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
-                                    + two_e_tc_non_hermit_integral_beta(i,j)  * TCSCF_density_matrix_ao_beta(i,j) )
-      TC_HF_one_electron_energy += ao_one_e_integrals_tc_tot(i,j) &
-                                       * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
     enddo
   enddo
 
-  TC_HF_energy += TC_HF_one_electron_energy + TC_HF_two_e_energy
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
   TC_HF_energy += diag_three_elem_hf
 
 END_PROVIDER
diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f
index 09a4a1b9..dde477c4 100644
--- a/src/tc_scf/tc_scf_utils.irp.f
+++ b/src/tc_scf/tc_scf_utils.irp.f
@@ -40,3 +40,4 @@ subroutine LTxSxR(n, m, L, S, R, C)
 end subroutine LTxR
 
 ! ---
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..6abeddf1
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,1008 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting test_ints ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  my_extra_grid_becke = .True.
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
+! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+! call test_total_grad_square
+! call test_ao_tc_int_chemist
+! call test_grid_points_ao
+! call test_tc_scf
+ !call test_int_gauss
+
+  !call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_mo()
+
+  !call test_tc_grad_and_lapl_ao()
+  !call test_tc_grad_square_ao()
+
+  call test_two_e_tc_non_hermit_integral()
+
+end
+
+! ---
+
+subroutine test_tc_scf
+ implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
+! provide tc_grad_square_ao_test
+!  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
+! ---
+
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_thr_b3_size(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
+   enddo
+!   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b_test
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ integer :: ii , jj
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
+!        stop
+!       endif
+!      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ double precision :: e_ref, e_new
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
+     enddo
+    enddo
+   enddo
+
+  enddo
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo
+
+! ---
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & 
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) &
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end
+
+! ---
+
+subroutine test_tc_grad_and_lapl_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_and_lapl_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_square_ao tc_grad_square_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_square_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_two_e_tc_non_hermit_integral()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha
+  PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_alpha(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_alpha    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot a = ', diff_tot / norm
+  print *, '     norm a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_beta(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_beta    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot b = ', diff_tot / norm
+  print *, '     norm b = ', norm
+  print *, ' '
+
+  ! ---
+
+  return
+
+end
+
+! ---
+
diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f
index 87488fba..8daa2b8b 100644
--- a/src/tools/print_he_energy.irp.f
+++ b/src/tools/print_he_energy.irp.f
@@ -7,8 +7,8 @@ program print_he_energy
 
   call print_overlap()
 
-  call print_energy1()
-  call print_energy2()
+  !call print_energy1()
+  !call print_energy2()
 
 end