diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90
index b701f0ec..73bd600d 100644
--- a/src/csf/cfgCI_interface.f90
+++ b/src/csf/cfgCI_interface.f90
@@ -46,6 +46,24 @@ module cfunctions
            real   (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax)
          end subroutine getCSFtoDETTransformationMatrix
       end interface
+       interface
+         subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt
+      end interface
+       interface
+         subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt_qp
+      end interface
 end module cfunctions
 
 subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) &
diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c
index 746de04e..76b64dd0 100644
--- a/src/csf/cfgCI_utils.c
+++ b/src/csf/cfgCI_utils.c
@@ -1,5 +1,6 @@
 #include <stdint.h>
 #include <stdio.h>
+#include <assert.h>
 #include "tree_utils.h"
 
 void int_to_bin_digit(int64_t in, int count, int* out)
@@ -28,19 +29,19 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){
     int nsomo = *inpnsomo;
     int ms = *inpms;
     int nparcoupl = (nsomo + ms)/2;
-    *outncsfs = binom(nsomo, nparcoupl);
+    *outncsfs = binom((double)nsomo, (double)nparcoupl);
 }
 
 void getncsfs(int NSOMO, int MS, int *outncsfs){
-    int nparcoupl = (NSOMO + MS)/2;
-    int nparcouplp1 = ((NSOMO + MS)/2)+1;
+    int nparcoupl = (NSOMO + MS)/2; // n_alpha
+    int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1
     double tmpndets=0.0;
     if(NSOMO == 0){
         (*outncsfs) = 1;
         return;
     }
-    tmpndets = binom(NSOMO, nparcoupl);
-    (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1));
+    tmpndets = binom((double)NSOMO, (double)nparcoupl);
+    (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1));
 }
 
 #include <stdint.h>
@@ -252,6 +253,26 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
     buildTreeDriver(bftree, *NSOMO, MS, NBF);
 }
 
+void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols);
+
+void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
+  int i,j;
+  //for(j=0;j<cols;++j){
+  //  for(i=0;i<rows;++i){
+  //    printf(" %3.2f ",overlapMatrix[j*rows + i]);
+  //  }
+  //  printf("\n");
+  //}
+  // Call the function ortho_qr from qp
+  ortho_qr_csf(overlapMatrix, rows, orthoMatrix, rows, cols);
+  //for(j=0;j<cols;++j){
+  //  for(i=0;i<rows;++i){
+  //    printf(" %3.2f ",orthoMatrix[j*rows + i]);
+  //  }
+  //  printf("\n");
+  //}
+}
+
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
 
     // vector
@@ -341,8 +362,12 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
          Get BFtoDeterminant Matrix
     ************************************/
 
-    printf("In convertcsftodet\n");
+
+    //printf(" --- In convet ----\n");
     convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);
+    //printf(" --- done bf det basis ---- row=%d col=%d\n",rowsbftodetI,colsbftodetI);
+
+    //printRealMatrix(bftodetmatrixI,rowsbftodetI,colsbftodetI);
 
     int rowsI = 0;
     int colsI = 0;
@@ -350,6 +375,8 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
     //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
     getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
 
+    //printf("Overlap matrix\n");
+    //printRealMatrix(overlapMatrixI,rowsI,colsI);
 
     /***********************************
          Get Orthonormalization Matrix
@@ -359,6 +386,9 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
 
     gramSchmidt(overlapMatrixI, rowsI, colsI, orthoMatrixI);
 
+    //printf("Ortho matrix\n");
+    //printRealMatrix(orthoMatrixI,rowsI,colsI);
+
     /***********************************
          Get Final CSF to Det Matrix
     ************************************/
@@ -1340,11 +1370,11 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
     for(int i = 0; i < npairs; i++){
         for(int j = 0; j < NSOMO; j++) {
             inpdet[j] = detslist[i*NSOMO + j];
-            printf(" %d ",inpdet[j]);
+            //printf(" %d ",inpdet[j]);
         }
-        printf("\n");
+        //printf("\n");
         findAddofDetDriver(dettree, NSOMO, inpdet, &addr);
-        printf("(%d) - addr  = %d\n",i,addr);
+        //printf("(%d) - addr  = %d\n",i,addr);
         // Calculate the phase for cfg to QP2 conversion
         //get_phase_cfg_to_qp_inpList(inpdet, NSOMO, &phase_cfg_to_qp);
         //rowvec[addr] = 1.0 * phaselist[i]*phase_cfg_to_qp/sqrt(fac);
@@ -1363,12 +1393,23 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
 void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols){
 
     int NSOMO=0;
+    //printf("before getSetBits Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
     getSetBits(Isomo, &NSOMO);
+    //printf("Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
     int ndets = 0;
     int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    // MS = alpha_num - beta_num
+    int nalpha = (NSOMO + MS)/2;
+    //printf(" in convertbftodet : MS=%d nalpha=%3.2f\n",MS,nalpha);
+    //ndets = (int)binom(dNSOMO, nalpha);
+    if(NSOMO > 0){
+      ndets = (int)binom((double)NSOMO, (double)nalpha);
+    }
+    else if(NSOMO == 0){
+      ndets = 1;
+    }
+    else printf("Something is wrong in calcMEdetpair\n");
 
     Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
     dettree.rootNode = malloc(sizeof(Node));
@@ -1389,16 +1430,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
     }
     else{
 
-    //int addr = -1;
-    //int inpdet[NSOMO];
-    //inpdet[0] = 1;
-    //inpdet[1] = 1;
-    //inpdet[2] = 1;
-    //inpdet[3] = 0;
-    //inpdet[4] = 0;
-    //inpdet[5] = 0;
-
-    //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr);
 
     int detlist[ndets];
     getDetlistDriver(&dettree, NSOMO, detlist);
@@ -1411,6 +1442,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
     generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO);
 
     // Initialize transformation matrix
+    //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO);
+    assert( NBF > 0);
+    assert( ndets > 0);
     (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double));
     (*rows) = NBF;
     (*cols) = ndets;
@@ -1465,9 +1499,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co
     getSetBits(Isomo, &NSOMO);
     int ndets = 0;
     int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    //double nalpha = (NSOMO + MS)/2.0;
+    int nalpha = (NSOMO + MS)/2;
+    ndets = (int)binom((double)NSOMO, (double)nalpha);
 
     Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
     dettree.rootNode = malloc(sizeof(Node));
@@ -1551,6 +1586,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso
     getncsfs(NSOMOJ, MS, &NBFJ);
     (*rowsout) = NBFI;
     (*colsout) = NBFJ;
+  //exit(0);
 }
 
 void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){
@@ -1669,6 +1705,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int rowsbftodetI, colsbftodetI;
 
+    //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS);
     convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);
 
     // Fill matrix
@@ -1676,8 +1713,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     int colsI = 0;
 
     //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
-    //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    //printf("Isomo=%ld\n",Isomo);
     getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    if(Isomo == 0){
+      rowsI = 1;
+      colsI = 1;
+    }
+
+    //printf("Isomo=%ld\n",Isomo);
 
     orthoMatrixI = malloc(rowsI*colsI*sizeof(double));
 
@@ -1689,6 +1732,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int rowsbftodetJ, colsbftodetJ;
 
+    //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS);
     convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ);
 
     int rowsJ = 0;
@@ -1696,6 +1740,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     // Fill matrix
     //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
     getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
+    if(Jsomo == 0){
+      rowsJ = 1;
+      colsJ = 1;
+    }
 
     orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double));
 
@@ -1713,18 +1761,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int transA=false;
     int transB=false;
+    //printf("1Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA);
     callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB);
+    //printf("done\n");
 
     // now transform I in csf basis
     double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double));
     transA = false;
     transB = false;
+    //printf("2Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA);
     callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB);
 
     // now transform J in BF basis
     double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double));
     transA = false;
     transB = true;
+    //printf("3Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ);
     callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB);
 
     // now transform J in CSF basis
@@ -1735,13 +1790,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double));
     transA = false;
     transB = true;
+    //printf("4Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ);
     callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB);
     // Transfer to actual buffer in Fortran order
     for(int i = 0; i < rowsI; i++)
         for(int j = 0; j < rowsJ; j++)
             CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j];
 
-
     // Garbage collection
     free(overlapMatrixI);
     free(overlapMatrixJ);
diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index f73362eb..cea7640c 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -1,3 +1,592 @@
+use bitmasks
+
+ BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
+&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
+  implicit none
+  !use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer                        :: idxI ! The id of the Ith CFG
+  integer(bit_kind)              :: Icfg(N_int,2)
+  integer                        :: NalphaIcfg
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                      :: Idomo
+  integer*8                      :: Isomo
+  integer*8                      :: Jdomo
+  integer*8                      :: Jsomo
+  integer*8                      :: diffSOMO
+  integer*8                      :: diffDOMO
+  integer*8                      :: xordiffSOMODOMO
+  integer                        :: ndiffSOMO
+  integer                        :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: ndiffAll
+  integer                        :: i,ii
+  integer                        :: j,jj
+  integer                        :: k,kk
+  integer                        :: kstart
+  integer                        :: kend
+  integer                        :: Nsomo_I
+  integer                        :: hole
+  integer                        :: p
+  integer                        :: q
+  integer                        :: countalphas
+  logical                        :: pqAlreadyGenQ
+  logical                        :: pqExistsQ
+  logical                        :: ppExistsQ
+  integer*8                      :: MS
+
+  double precision               :: t0, t1
+  call wall_time(t0)
+
+  MS = elec_alpha_num-elec_beta_num
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  NalphaIcfg_list = 0
+
+  do idxI = 1, N_configuration
+
+    Icfg  = psi_configuration(:,:,idxI)
+
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+
+    ! find out all pq holes possible
+    nholes = 0
+    ! holes in SOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+      endif
+    end do
+    ! holes in DOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+      endif
+    end do
+
+    ! find vmos
+    listvmos = -1
+    vmotype = -1
+    nvmos = 0
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+        if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 1
+        else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 2
+        end if
+      end if
+    end do
+
+    tableUniqueAlphas = .FALSE.
+
+    ! Now find the allowed (p,q) excitations
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+    Nsomo_I = POPCNT(Isomo)
+    if(Nsomo_I .EQ. 0) then
+      kstart = 1
+    else
+      kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+    endif
+    kend = idxI-1
+
+    do i = 1,nholes
+      p = listholes(i)
+      do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+          ! SOMO -> VMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = Idomo
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+          ! SOMO -> SOMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBSET(Idomo,q-1)
+          ! Check for Minimal alpha electrons (MS)
+          if(POPCNT(Jsomo).ge.MS)then
+            kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+            kend = idxI-1
+          else
+            cycle
+          endif
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+          ! DOMO -> VMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          kstart = cfg_seniority_index(Nsomo_I)
+          kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+          ! DOMO -> SOMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          Jdomo = IBSET(Jdomo,q-1)
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else
+          print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+        ! Check for Minimal alpha electrons (MS)
+        if(POPCNT(Jsomo).lt.MS)then
+          cycle
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniority to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+          diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
+          ndiffSOMO = POPCNT(diffSOMO)
+          if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+          diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
+          xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+          ndiffDOMO = POPCNT(diffDOMO)
+          nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+          nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+          !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+          if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+            pqAlreadyGenQ = .TRUE.
+            ppExistsQ = .TRUE.
+            EXIT
+          endif
+          if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+            pqAlreadyGenQ = .TRUE.
+            EXIT
+          endif
+        end do
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+
+        if(.NOT. pqExistsQ) then
+          tableUniqueAlphas(p,q) = .TRUE.
+        endif
+      end do
+    end do
+
+    !print *,tableUniqueAlphas(:,:)
+
+    ! prune list of alphas
+    Isomo = Icfg(1,1)
+    Idomo = Icfg(1,2)
+    Jsomo = Icfg(1,1)
+    Jdomo = Icfg(1,2)
+    NalphaIcfg = 0
+    do i = 1, nholes
+      p = listholes(i)
+      do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+          if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+            ! SOMO -> VMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = Idomo
+          else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+            ! SOMO -> SOMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBSET(Idomo,q-1)
+            if(POPCNT(Jsomo).ge.MS)then
+              kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+              kend = idxI-1
+            else
+              cycle
+            endif
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+            ! DOMO -> VMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+            ! DOMO -> SOMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+            Jdomo = IBSET(Jdomo,q-1)
+          else
+            print*,"Something went wrong in obtain_associated_alphaI"
+          endif
+
+          ! SOMO
+          !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+          if(POPCNT(Jsomo) .ge. NSOMOMin) then
+            NalphaIcfg += 1
+            alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo
+            alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+            NalphaIcfg_list(idxI) = NalphaIcfg
+          endif
+        endif
+      end do
+    end do
+
+    ! Check if this Icfg has been previously generated as a mono
+    ppExistsQ = .False.
+    Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
+    kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    do k = kstart, idxI-1
+      diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+      ndiffSOMO = POPCNT(diffSOMO)
+      if (ndiffSOMO /= 2) cycle
+      diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+      xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+      ndiffDOMO = POPCNT(diffDOMO)
+      nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
+        ppExistsQ = .TRUE.
+        EXIT
+      endif
+    end do
+    ! Diagonal part (pp,qq)
+    if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+      ! SOMO
+      if(POPCNT(Jsomo) .ge. NSOMOMin) then
+        NalphaIcfg += 1
+        alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1)
+        alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2)
+        NalphaIcfg_list(idxI) = NalphaIcfg
+      endif
+    endif
+
+    NalphaIcfg = 0
+  enddo ! end loop idxI
+  call wall_time(t1)
+  print *, 'Preparation : ', t1 - t0
+
+END_PROVIDER
+
+  subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer,intent(in)                 :: idxI ! The id of the Ith CFG
+  integer(bit_kind),intent(in)       :: Icfg(N_int,2)
+  integer,intent(out)                :: NalphaIcfg
+  integer(bit_kind),intent(out)      :: alphasIcfg(N_int,2,*)
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                            :: listholes(mo_num)
+  integer                            :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                            :: nholes
+  integer                            :: nvmos
+  integer                            :: listvmos(mo_num)
+  integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                          :: Idomo
+  integer*8                          :: Isomo
+  integer*8                          :: Jdomo
+  integer*8                          :: Jsomo
+  integer*8                          :: diffSOMO
+  integer*8                          :: diffDOMO
+  integer*8                          :: xordiffSOMODOMO
+  integer                            :: ndiffSOMO
+  integer                            :: ndiffDOMO
+  integer                            :: nxordiffSOMODOMO
+  integer                            :: ndiffAll
+  integer                            :: i, ii
+  integer                            :: j, jj
+  integer                            :: k, kk
+  integer                            :: kstart
+  integer                            :: kend
+  integer                            :: Nsomo_I
+  integer                            :: hole
+  integer                            :: p
+  integer                            :: q
+  integer                            :: countalphas
+  logical                            :: pqAlreadyGenQ
+  logical                            :: pqExistsQ
+  logical                            :: ppExistsQ
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  !print*,"Input cfg"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  ! find out all pq holes possible
+  nholes = 0
+  ! holes in SOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+     endif
+  end do
+  ! holes in DOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+     endif
+  end do
+
+  ! find vmos
+  listvmos = -1
+  vmotype = -1
+  nvmos = 0
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+     if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 1
+     else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 2
+     end if
+  end do
+
+  !print *,"Nvmo=",nvmos
+  !print *,listvmos
+  !print *,vmotype
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  tableUniqueAlphas = .FALSE.
+
+  ! Now find the allowed (p,q) excitations
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  Nsomo_I = POPCNT(Isomo)
+  if(Nsomo_I .EQ. 0) then
+    kstart = 1
+  else
+    kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+  endif
+  kend = idxI-1
+  !print *,"Isomo"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI
+  !do i = 1,nholes
+  !   print *,i,"->",listholes(i)
+  !enddo
+  !do i = 1,nvmos
+  !   print *,i,"->",listvmos(i)
+  !enddo
+
+  do i = 1,nholes
+     p = listholes(i)
+     do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+           ! SOMO -> VMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = Idomo
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+           ! SOMO -> SOMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBSET(Idomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+           ! DOMO -> VMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           kstart = cfg_seniority_index(Nsomo_I)
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+           ! DOMO -> SOMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           Jdomo = IBSET(Jdomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else
+           print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniortiy to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+           diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+           ndiffSOMO = POPCNT(diffSOMO)
+           if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+           diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+           xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+           ndiffDOMO = POPCNT(diffDOMO)
+           nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+           nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+           !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+           if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+              pqAlreadyGenQ = .TRUE.
+              ppExistsQ = .TRUE.
+              EXIT
+           endif
+           if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+              pqAlreadyGenQ = .TRUE.
+              !EXIT
+              !ppExistsQ = .TRUE.
+              !print *,i,k,ndiffSOMO,ndiffDOMO
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Jdomo,1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1)
+              EXIT
+           endif
+        end do
+
+        !print *,"(,",p,",",q,")",pqAlreadyGenQ
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+        ! now check if this exists in the selected list
+        !do k = idxI+1, N_configuration
+        !   diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k))
+        !   diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k))
+        !   ndiffSOMO = POPCNT(diffSOMO)
+        !   ndiffDOMO = POPCNT(diffDOMO)
+        !   if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        !      pqExistsQ = .TRUE.
+        !      EXIT
+        !   endif
+        !end do
+
+        if(.NOT. pqExistsQ) then
+           tableUniqueAlphas(p,q) = .TRUE.
+           !print *,p,q
+           !call debug_spindet(Jsomo,1)
+           !call debug_spindet(Jdomo,1)
+        endif
+     end do
+  end do
+
+  !print *,tableUniqueAlphas(:,:)
+
+  ! prune list of alphas
+  Isomo = Icfg(1,1)
+  Idomo = Icfg(1,2)
+  Jsomo = Icfg(1,1)
+  Jdomo = Icfg(1,2)
+  NalphaIcfg = 0
+  do i = 1, nholes
+     p = listholes(i)
+     do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+           if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+              ! SOMO -> VMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = Idomo
+           else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+              ! SOMO -> SOMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBSET(Idomo,q-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+              ! DOMO -> VMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+              ! DOMO -> SOMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+              Jdomo = IBSET(Jdomo,q-1)
+           else
+              print*,"Something went wrong in obtain_associated_alphaI"
+           endif
+
+           ! SOMO
+           NalphaIcfg += 1
+           !print *,i,j,"|",NalphaIcfg
+           alphasIcfg(1,1,NalphaIcfg) = Jsomo
+           alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+           !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+        endif
+     end do
+  end do
+
+  ! Check if this Icfg has been previously generated as a mono
+  ppExistsQ = .False.
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  do k = 1, idxI-1
+     diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+     diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffSOMO = POPCNT(diffSOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        ppExistsQ = .TRUE.
+        EXIT
+     endif
+  end do
+  ! Diagonal part (pp,qq)
+  if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+     ! SOMO
+     NalphaIcfg += 1
+     !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg
+     !call debug_spindet(Idomo,1)
+     !call debug_spindet(Jdomo,1)
+     alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1)
+     alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2)
+  endif
+
+  end subroutine
+
   function getNSOMO(Icfg) result(NSOMO)
     implicit none
     integer(bit_kind),intent(in)   :: Icfg(N_int,2)
@@ -8,98 +597,3 @@
        NSOMO += POPCNT(Icfg(i,1))
     enddo
   end function getNSOMO
-
-subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel)
-  implicit none
-  BEGIN_DOC
-  ! This function converts the orbital ids
-  ! in real space to those used in model space
-  ! in order to identify the matrices required
-  ! for the calculation of MEs.
-  !
-  ! The type of excitations are ordered as follows:
-  ! Type 1 - SOMO -> SOMO
-  ! Type 2 - DOMO -> VMO
-  ! Type 3 - SOMO -> VMO
-  ! Type 4 - DOMO -> SOMO
-  END_DOC
-  integer(bit_kind),intent(in)         :: Ialpha(N_int,2)
-  integer(bit_kind),intent(in)         :: Jcfg(N_int,2)
-  integer,intent(in)                   :: p,q
-  integer,intent(in)                   :: extype
-  integer,intent(out)                  :: pmodel,qmodel
-  integer*8                            :: Isomo
-  integer*8                            :: Idomo
-  integer*8                            :: Jsomo
-  integer*8                            :: Jdomo
-  integer*8                            :: mask
-  integer*8                            :: Isomotmp
-  integer*8                            :: Jsomotmp
-  integer                              :: pos0,pos0prev
-
-  ! TODO Flag (print) when model space indices is > 64
-  Isomo = Ialpha(1,1)
-  Idomo = Ialpha(1,2)
-  Jsomo = Jcfg(1,1)
-  Jdomo = Jcfg(1,2)
-  pos0prev = 0
-  pmodel = p
-  qmodel = q
-
-  if(p .EQ. q) then
-     pmodel = 1
-     qmodel = 1
-  else
-     !print *,"input pq=",p,q,"extype=",extype
-     !call debug_spindet(Isomo,1)
-     !call debug_spindet(Idomo,1)
-     !call debug_spindet(Jsomo,1)
-     !call debug_spindet(Jdomo,1)
-     select case(extype)
-       case (1)
-          ! SOMO -> SOMO
-          ! remove all domos
-          !print *,"type -> SOMO -> SOMO"
-          mask = ISHFT(1_8,p) - 1
-          Isomotmp = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomotmp = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-       case (2)
-          ! DOMO -> VMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> VMO"
-          mask = ISHFT(1_8,p) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-       case (3)
-          ! SOMO -> VMO
-          !print *,"type -> SOMO -> VMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Isomo = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomo = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-       case (4)
-          ! DOMO -> SOMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> SOMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Jsomo = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomo = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-       case default
-          print *,"something is wrong in convertOrbIdsToModelSpaceIds"
-     end select
-  endif
-     !print *,p,q,"model ids=",pmodel,qmodel
-end subroutine convertOrbIdsToModelSpaceIds
diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f
index ce5d48ab..aebf53d9 100644
--- a/src/csf/configurations.irp.f
+++ b/src/csf/configurations.irp.f
@@ -458,8 +458,9 @@ end
 
 END_PROVIDER
 
- BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ]
+ BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ]
 &BEGIN_PROVIDER [ integer, cfg_nsomo_max ]
+&BEGIN_PROVIDER [ integer, cfg_nsomo_min ]
   implicit none
   BEGIN_DOC
  ! Returns the index in psi_configuration of the first cfg with
@@ -467,9 +468,10 @@ END_PROVIDER
  !
  ! cfg_nsomo_max : Max number of SOMO in the current wave function
  END_DOC
- integer :: i, k, s, sold
+ integer :: i, k, s, sold, soldmin
  cfg_seniority_index(:) = -1
  sold = -1
+ soldmin = 2000
  cfg_nsomo_max = 0
  do i=1,N_configuration
    s = 0
@@ -482,6 +484,10 @@ END_PROVIDER
      cfg_seniority_index(s) = i
      cfg_nsomo_max = s
    endif
+   if (soldmin .GT. s ) then
+     soldmin = s
+     cfg_nsomo_min = s
+   endif
  enddo
 END_PROVIDER
 
@@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d
  enddo
 END_PROVIDER
 
-subroutine binary_search_cfg(cfgInp,addcfg)
+subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp)
   use bitmasks
   implicit none
   BEGIN_DOC
   ! Documentation for binary_search
-  ! 
-  ! Does a binary search to find 
+  !
+  ! Does a binary search to find
   ! the address of a configuration in a list of
   ! configurations.
   END_DOC
   integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
   integer          , intent(out) :: addcfg
-  integer :: i,j,k,r,l
-  integer*8 :: key, key2
-  logical :: found
-  !integer*8, allocatable :: bit_tmp(:)
-  !integer*8, external            :: configuration_search_key
+  integer*8,          intent(in) :: bit_tmp(0:N_configuration+1)
 
-  !allocate(bit_tmp(0:N_configuration))
-  !bit_tmp(0) = 0
-  do i=1,N_configuration
-    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
-    found = .True.
-    do k=1,N_int
-      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
-                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
-    enddo
-    if (found) then
-      addcfg = i
-      exit
+  logical                        :: found
+  integer                        :: l, r, j, k
+  integer*8                      :: key
+
+  integer*8, external            :: configuration_search_key
+
+  key = configuration_search_key(cfgInp,N_int)
+
+  ! Binary search
+  l = 0
+  r = N_configuration+1
+IRP_IF WITHOUT_SHIFTRL
+  j = ishft(r-l,-1)
+IRP_ELSE
+  j = shiftr(r-l,1)
+IRP_ENDIF
+  do while (j>=1)
+    j = j+l
+    if (bit_tmp(j) == key) then
+      ! Find 1st element which matches the key
+      if (j > 1) then
+        do while (j>1 .and. bit_tmp(j-1) == key)
+          j = j-1
+        enddo
+      endif
+      ! Find correct element matching the key
+      do while (bit_tmp(j) == key)
+        found = .True.
+        do k=1,N_int
+          found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))&
+                        .and. (psi_configuration(k,2,j) == cfgInp(k,2))
+        enddo
+        if (found) then
+          addcfg = j
+          return
+        endif
+        j = j+1
+      enddo
+      addcfg = -1
+      return
+    else if (bit_tmp(j) > key) then
+      r = j
+    else
+      l = j
     endif
+IRP_IF WITHOUT_SHIFTRL
+    j = ishft(r-l,-1)
+IRP_ELSE
+    j = shiftr(r-l,1)
+IRP_ENDIF
   enddo
 
+  addcfg = -1
+  return
+
 end subroutine
 
+!subroutine binary_search_cfg(cfgInp,addcfg)
+!  use bitmasks
+!  implicit none
+!  BEGIN_DOC
+!  ! Documentation for binary_search
+!  ! 
+!  ! Does a binary search to find 
+!  ! the address of a configuration in a list of
+!  ! configurations.
+!  END_DOC
+!  integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
+!  integer          , intent(out) :: addcfg
+!  integer :: i,j,k,r,l
+!  integer*8 :: key, key2
+!  logical :: found
+!  !integer*8, allocatable :: bit_tmp(:)
+!  !integer*8, external            :: configuration_search_key
+!
+!  !allocate(bit_tmp(0:N_configuration))
+!  !bit_tmp(0) = 0
+!  do i=1,N_configuration
+!    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
+!    found = .True.
+!    do k=1,N_int
+!      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
+!                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
+!    enddo
+!    if (found) then
+!      addcfg = i
+!      exit
+!    endif
+!  enddo
+!
+!end subroutine
+!
  BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ]
 &BEGIN_PROVIDER [ integer, psi_configuration_n_det, (N_configuration) ]
 &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ]
diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f
index 75f6e539..494c3bfa 100644
--- a/src/csf/conversion.irp.f
+++ b/src/csf/conversion.irp.f
@@ -24,7 +24,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
   double precision, intent(out)  :: psi_coef_cfg_out(n_CSF,N_st)
   integer*8                      :: Isomo, Idomo, mask
   integer(bit_kind)              :: Ialpha(N_int) ,Ibeta(N_int)
-  integer                        :: rows, cols, i, j, k
+  integer                        :: rows, cols, i, j, k, salpha
   integer                        :: startdet, enddet
   integer                        :: ndetI
   integer                        :: getNSOMO
@@ -65,9 +65,11 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
     enddo
 
     if(iand(s,1) .EQ. 0) then
-      bfIcfg = max(1,nint((binom(s,s/2)-binom(s,(s/2)+1))))
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
     else
-      bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
     endif
 
     ! perhaps blocking with CFGs of same seniority
@@ -99,7 +101,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   double precision,intent(in)    :: psi_coef_cfg_in(n_CSF,N_st)
   double precision,intent(out)   :: psi_coef_det(N_det,N_st)
   double precision               :: tmp_psi_coef_det(maxDetDimPerBF,N_st)
-  integer                        :: s, bfIcfg
+  integer                        :: s, bfIcfg, salpha
   integer                        :: countcsf
   integer(bit_kind)              :: Ialpha(N_int), Ibeta(N_int)
   integer                        :: rows, cols, i, j, k
@@ -110,6 +112,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   double precision,allocatable   :: tempCoeff (:,:)
   double precision               :: phasedet
   integer                        :: idx
+  integer MS
+  MS = elec_alpha_num-elec_beta_num
 
   countcsf = 0
 
@@ -123,7 +127,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
       if (psi_configuration(k,1,i) == 0_bit_kind) cycle
       s = s + popcnt(psi_configuration(k,1,i))
     enddo
-    bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+    salpha = (s + MS)/2
+    bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
 
     allocate(tempCoeff(bfIcfg,N_st))
 
diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f
index c1560148..1c59d579 100644
--- a/src/csf/create_excitations.irp.f
+++ b/src/csf/create_excitations.irp.f
@@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint)
   enddo
 end
 
-subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
+subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
   implicit none
   use bitmasks
   BEGIN_DOC
@@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
   ! ex_type_singles : on output contains type of excitations  :
   !
   END_DOC
+  integer*8, intent(in)          :: bit_tmp(0:N_configuration+1)
   integer, intent(in)            :: Nint
   integer, intent(inout)         :: n_singles
   integer, intent(out)           :: idxs_singles(*)
@@ -248,20 +249,26 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
   integer(bit_kind)              :: Jdet(Nint,2)
 
   integer           :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg
+  integer           :: ii,kk
   integer(bit_kind) :: single(Nint,2)
   logical           :: i_ok
 
+
   n_singles = 0
   !TODO
   !Make list of Somo  and Domo for holes
   !Make list of Unocc and Somo for particles
-  do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
-    do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+  !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
+  do ii = 1, n_act_orb
+    i_hole = list_act(ii)
+    !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+    do kk = 1, n_act_orb
+      i_particle = list_act(kk)
       if(i_hole .EQ. i_particle) cycle
       addcfg = -1
       call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok)
       if (i_ok) then
-        call binary_search_cfg(single,addcfg)
+        call binary_search_cfg(single,addcfg,bit_tmp)
         if(addcfg .EQ. -1) cycle
         n_singles = n_singles + 1
         do k=1,Nint
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
new file mode 100644
index 00000000..7d7ae09b
--- /dev/null
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -0,0 +1,397 @@
+subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! givenI by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: givenI(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: ntotalconnectedI
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                                  :: nxordiffSOMODOMO
+  integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer :: listholes(mo_num)
+  integer :: holetype(mo_num)
+  integer :: end_index
+  integer :: Nsomo_I
+
+  ! 
+  ! 2 2 1 1 0 0 : 1 1 0 0 0 0
+  !               0 0 1 1 0 0
+  !
+  ! 2 1 1 1 1 0 : 1 0 0 0 0 0
+  !               0 1 1 1 1 0
+  !xorS           0 1 0 0 1 0 : 2
+  !xorD           0 1 0 0 0 0 : 1 
+  !xorSD          0 0 0 0 1 0 : 1
+  !                           -----
+  !                             4
+  ! 1 1 1 1 1 1 : 0 0 0 0 0 0
+  !               1 1 1 1 1 1
+  !               1 1 0 0 1 1 : 4
+  !               1 1 0 0 0 0 : 2 
+  !               0 0 0 0 1 1 : 2
+  !                           -----
+  !                             8
+  !
+
+  nconnectedI = 0
+  ntotalconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = givenI(1,1)
+  Idomo = givenI(1,2)
+  Nsomo_I = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  !end_index = N_configuration
+  !print *,"Start and End = ",idxI, end_index
+
+
+  p = 0
+  q = 0
+  do i=idxI,end_index
+     !if(.True.) then
+     !  nconnectedI += 1
+     !  connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+     !  idxs_connectedI(nconnectedI)=i
+     !  cycle
+     !endif
+     Isomo = givenI(1,1)
+     Idomo = givenI(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+       !-------
+       ! MONO |
+       !-------
+        nconnectedI += 1
+        connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+        idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then
+       !----------------------------
+       ! DOMO -> VMO + DOMO -> VMO |
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then
+       !----------------------------
+       ! DOUBLE
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+       !--------
+       ! I = I |
+       !--------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)= i
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes)
+     endif
+  end do
+
+end subroutine obtain_connected_J_givenI
+
+subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! Ialpha by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: Ialpha(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: excitationIds(2,*)
+  integer,intent(out)                      :: excitationTypes(*)
+  real*8 ,intent(out)                      :: diagfactors(*)
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num)
+  integer                        :: end_index
+  integer                        :: Nsomo_alpha
+  integer*8                      :: MS
+  MS = elec_alpha_num-elec_beta_num
+
+  nconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Nsomo_alpha = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  end_index = N_configuration
+
+
+  p = 0
+  q = 0
+  if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
+  do i=idxI,end_index
+     Isomo = Ialpha(1,1)
+     Idomo = Ialpha(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     ! Check for Minimal alpha electrons (MS)
+     if(POPCNT(Isomo).lt.MS)then
+       cycle
+     endif
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     !if(idxI.eq.1)then
+     !  print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo)
+     !endif
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        select case(ndiffDOMO)
+        case (0)
+           ! SOMO -> VMO
+           !print *,"obt SOMO -> VMO"
+           extyp = 3
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case (1)
+           ! DOMO -> VMO
+           ! or
+           ! SOMO -> SOMO
+           nsomoJ = POPCNT(Jsomo)
+           nsomoalpha = POPCNT(Isomo)
+           if(nsomoJ .GT. nsomoalpha) then
+              ! DOMO -> VMO
+              !print *,"obt DOMO -> VMO"
+              extyp = 2
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+           else
+              ! SOMO -> SOMO
+              !print *,"obt SOMO -> SOMO"
+              extyp = 1
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,q-1)
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+              ! Check for Minimal alpha electrons (MS)
+              !if(POPCNT(Isomo).lt.MS)then
+              !  cycle
+              !endif
+           end if
+        case (2)
+           ! DOMO -> SOMO
+           !print *,"obt DOMO -> SOMO"
+           extyp = 4
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case default
+           print *,"something went wront in get connectedI"
+        end select
+        starti = psi_config_data(i,1)
+        endi   = psi_config_data(i,2)
+        nconnectedI += 1
+        do k=1,N_int
+          connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
+          connectedI(k,2,nconnectedI) = psi_configuration(k,2,i)
+        enddo
+        idxs_connectedI(nconnectedI)=starti
+        excitationIds(1,nconnectedI)=p
+        excitationIds(2,nconnectedI)=q
+        excitationTypes(nconnectedI) = extyp
+        diagfactors(nconnectedI) = 1.0d0
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+
+        do k=1,nholes
+           p = listholes(k)
+           q = p
+           extyp = 1
+           if(holetype(k) .EQ. 1) then
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 1.0d0
+           else
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 2.0d0
+           endif
+        enddo
+     endif
+  end do
+
+end subroutine obtain_connected_I_foralpha
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 5aaba9a3..21c19aaa 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -8,6 +8,7 @@
  end function logabsgamma
 
   BEGIN_PROVIDER [ integer, NSOMOMax]
+ &BEGIN_PROVIDER [ integer, NSOMOMin]
  &BEGIN_PROVIDER [ integer, NCSFMax]
  &BEGIN_PROVIDER [ integer*8, NMO]
  &BEGIN_PROVIDER [ integer, NBFMax]
@@ -19,11 +20,21 @@
   ! The maximum number of SOMOs for the current calculation.
   ! required for the calculation of prototype arrays.
   END_DOC
+  integer MS, ialpha
+  MS = elec_alpha_num-elec_beta_num
   NSOMOMax = min(elec_num, cfg_nsomo_max + 2)
+  print *,'cfg_nsomo_min=',cfg_nsomo_min
+  print *,'cfg_nsomo_max=',cfg_nsomo_max
+  if(AND(cfg_nsomo_min , 1) .eq. 0)then
+    NSOMOMin = max(0,cfg_nsomo_min-2)
+  else
+    NSOMOMin = max(1,cfg_nsomo_min-2)
+  endif
   ! Note that here we need NSOMOMax + 2 sizes
-  NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0
+  ialpha = (NSOMOMax + MS)/2
+  NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK)
   NBFMax = NCSFMax
-  maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2))))
+  maxDetDimPerBF = max(1,nint((binom(NSOMOMax,ialpha))))
   NMO = n_act_orb
   integer i,j,k,l
   integer startdet,enddet
@@ -32,81 +43,113 @@
   integer dimcsfpercfg
   integer detDimperBF
   real*8 :: coeff, binom1, binom2
-  integer MS
   integer ncfgpersomo
   real*8, external :: logabsgamma
   detDimperBF = 0
-  MS = elec_alpha_num-elec_beta_num
   ! number of cfgs = number of dets for 0 somos
+  n_CSF = cfg_seniority_index(NSOMOMin)-1
+  ncfgprev = cfg_seniority_index(NSOMOMin)
+  !do i = 0-iand(MS,1)+2, NSOMOMax,2
+  !!print *," i=",0," dimcsf=",1," ncfg=",ncfgprev, " senor=",cfg_seniority_index(0)
+  !!do i = NSOMOMin+2, NSOMOMax,2
+  !!   if(cfg_seniority_index(i) .EQ. -1)then
+  !!      ncfgpersomo = N_configuration + 1
+  !!   else
+  !!      ncfgpersomo = cfg_seniority_index(i)
+  !!   endif
+  !!ncfg = ncfgpersomo - ncfgprev
+  !!!detDimperBF = max(1,nint((binom(i,(i+1)/2))))
+  !!!dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1))))
+  !!n_CSF += ncfg * dimcsfpercfg
+  !!!if(cfg_seniority_index(i+2) == -1) EXIT
+  !!!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF
+  !!ncfgprev = cfg_seniority_index(i)
+  !!print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i)
+  !!enddo
+  !!print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration
   n_CSF = 0
-  ncfgprev = cfg_seniority_index(0)
-  ncfgpersomo = ncfgprev
-  do i = iand(MS,1), NSOMOMax-2,2
-    if(cfg_seniority_index(i) .EQ. -1) then
-      cycle
-    endif
-    if(cfg_seniority_index(i+2) .EQ. -1) then
-      ncfgpersomo = N_configuration + 1
-    else
-      if(cfg_seniority_index(i+2) > ncfgpersomo) then
-          ncfgpersomo = cfg_seniority_index(i+2)
-      else
-        ! l = i+k+2 
-        ! Loop over l with a constraint to ensure that l <= size(cfg_seniority_index,1)-1
-        ! Old version commented just below
-        do l = min(size(cfg_seniority_index,1)-1, i+2), size(cfg_seniority_index,1)-1, 2
-          if (cfg_seniority_index(l) >= ncfgpersomo) then
-            ncfgpersomo = cfg_seniority_index(l)
-          endif
-        enddo
-        !k = 0
-        !if ((i+2+k) < size(cfg_seniority_index,1)) then
-        !  do while(cfg_seniority_index(i+2+k) < ncfgpersomo)
-        !    k = k + 2
-        !    if ((i+2+k) >= size(cfg_seniority_index,1)) then
-        !      exit
-        !    endif
-        !    ncfgpersomo = cfg_seniority_index(i+2+k)
-        !  enddo
-        !endif
+  !ncfgprev = cfg_seniority_index(0)
+  !ncfgpersomo = ncfgprev
+  !do i = iand(MS,1), NSOMOMax-2,2
+  !  if(cfg_seniority_index(i) .EQ. -1) then
+  !    cycle
+  !  endif
+  !  if(cfg_seniority_index(i+2) .EQ. -1) then
+  !    ncfgpersomo = N_configuration + 1
+  !  else
+  !    if(cfg_seniority_index(i+2) > ncfgpersomo) then
+  !        ncfgpersomo = cfg_seniority_index(i+2)
+  !    else
+  !      k = 0
+  !      do while(cfg_seniority_index(i+2+k) < ncfgpersomo)
+  !        k = k + 2
+  !        ncfgpersomo = cfg_seniority_index(i+2+k)
+  !      enddo
+  !    endif
+  !  endif
+  !  ncfg = ncfgpersomo - ncfgprev
+  !  if(i .EQ. 0 .OR. i .EQ. 1) then
+  !    dimcsfpercfg = 1
+  !  elseif( i .EQ. 3) then
+  !    dimcsfpercfg = 2
+  !  else
+  !    if(iand(MS,1) .EQ. 0) then
+  !      dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1))))
+  !    else
+  !      dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2))))
+  !    endif
+  !  endif
+  !  n_CSF += ncfg * dimcsfpercfg
+  !  print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i)
+  !  if(cfg_seniority_index(i+2) > ncfgprev) then
+  !    ncfgprev = cfg_seniority_index(i+2)
+  !  else
+  !    k = 0
+  !    do while(cfg_seniority_index(i+2+k) < ncfgprev)
+  !      k = k + 2
+  !      ncfgprev = cfg_seniority_index(i+2+k)
+  !    enddo
+  !  endif
+  !enddo
+  n_CSF = 0
+  print *," -9(((((((((((((( NSOMOMin=",NSOMOMin
+  ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 
+  if(ncfgprev.eq.-1)then
+    ncfgprev=1
+  endif
+  do i=NSOMOMin,NSOMOMax+2,2
+    !k=0
+    !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax))
+    !  k=k+2
+    !end do
+    if(cfg_seniority_index(i).eq.-1)cycle
+    if(cfg_seniority_index(i+2).eq.-1)then
+      ncfg = N_configuration - ncfgprev + 1
+      if(ncfg .eq. 0)then
+        ncfg=1
       endif
+    else
+      ncfg = cfg_seniority_index(i+2) - ncfgprev
     endif
-    ncfg = ncfgpersomo - ncfgprev
     if(i .EQ. 0 .OR. i .EQ. 1) then
       dimcsfpercfg = 1
     elseif( i .EQ. 3) then
       dimcsfpercfg = 2
     else
       if(iand(MS,1) .EQ. 0) then
-        dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1))))
+        ialpha = (i + MS)/2
+        dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1))))
       else
-        dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2))))
+        ialpha = (i + MS)/2
+        dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1))))
       endif
     endif
-    n_CSF += ncfg * dimcsfpercfg
-    if(cfg_seniority_index(i+2) > ncfgprev) then
-      ncfgprev = cfg_seniority_index(i+2)
-    else
-      ! l = i+k+2 
-      ! Loop over l with a constraint to ensure that l <= size(cfg_seniority_index,1)-1
-      ! Old version commented just below
-      do l = min(size(cfg_seniority_index,1)-1, i+2), size(cfg_seniority_index,1)-1, 2
-        if (cfg_seniority_index(l) >= ncfgprev) then
-          ncfgprev = cfg_seniority_index(l)
-        endif
-      enddo
-      !k = 0
-      !if ((i+2+k) < size(cfg_seniority_index,1)) then
-      !  do while(cfg_seniority_index(i+2+k) < ncfgprev)
-      !    k = k + 2
-      !    if ((i+2+k) >= size(cfg_seniority_index,1)) then
-      !      exit
-      !    endif
-      !    ncfgprev = cfg_seniority_index(i+2+k)
-      !  enddo
-      !endif
-    endif
-  enddo
+    n_CSF += ncfg*dimcsfpercfg
+    print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i)
+    ncfgprev = cfg_seniority_index(i+2)
+  end do
+  print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration
+
 END_PROVIDER
 
 
@@ -127,9 +170,9 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout)
   real*8,intent(out)             :: phaseout
   integer(bit_kind)              :: mask, deta(N_int), detb(N_int)
   integer                        :: nbetas
-  integer                        :: k
+  integer                        :: count, k
 
-  ! Initialize deta and detb
+  ! Initliaze deta and detb
   deta = Ialpha
   detb = Ibeta
 
@@ -167,7 +210,117 @@ end subroutine get_phase_qp_to_cfg
 
 
 
-  BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)]
+  BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)]
+ &BEGIN_PROVIDER [ real*8, psi_coef_config,  (n_CSF,1)]
+ &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)]
+ &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)]
+  use cfunctions
+  implicit none
+  BEGIN_DOC
+  ! Documentation for DetToCSFTransformationMatrix
+  ! Provides the matrix of transformatons for the
+  ! conversion between determinant to CSF basis (in BFs)
+  END_DOC
+  integer*8 :: Isomo, Idomo
+  integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int)
+  integer   :: rows, cols, i, j, k
+  integer   :: startdet, enddet, idx
+  integer*8 MS, salpha
+  integer ndetI
+  integer :: getNSOMO
+  real*8,dimension(:,:),allocatable    :: tempBuffer
+  real*8,dimension(:),allocatable    :: tempCoeff
+  real*8  :: norm_det1, phasedet
+
+  integer :: nt
+
+
+  norm_det1 = 0.d0
+  MS = elec_alpha_num - elec_beta_num
+  ! initialization
+  psi_coef_config = 0.d0
+  DetToCSFTransformationMatrix(0,:,:) = 1.d0
+  do i = 2-iand(MS,1), NSOMOMax,2
+    Isomo = IBSET(0_8, i) - 1_8
+    ! rows = Ncsfs
+    ! cols = Ndets
+    salpha = (i+MS)/2
+    bfIcfg = max(1,nint((binom(i,salpha)-binom(i,salpha+1))))
+    ndetI =  max(1,nint((binom(i,salpha))))
+    !bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1))))
+    !ndetI =  max(1,nint((binom(i,(i+1)/2))))
+
+    allocate(tempBuffer(bfIcfg,ndetI))
+    call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer)
+    DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) =  tempBuffer(1:bfIcfg,1:ndetI)
+    deallocate(tempBuffer)
+  enddo
+
+  integer s, bfIcfg
+  integer countcsf
+  countcsf = 0
+  integer countdet
+  countdet = 0
+  integer istate
+  istate = 1
+  psi_csf_to_config_data(1) = 1
+  phasedet = 1.0d0
+  call omp_set_max_active_levels(1)
+  !$OMP PARALLEL
+  !$OMP MASTER
+  do i = 1,N_configuration
+      startdet = psi_configuration_to_psi_det(1,i)
+      enddet = psi_configuration_to_psi_det(2,i)
+      ndetI = enddet-startdet+1
+
+      allocate(tempCoeff(ndetI))
+      countdet = 1
+      do j = startdet, enddet
+         idx = psi_configuration_to_psi_det_data(j)
+         Ialpha(:) = psi_det(:,1,idx)
+         Ibeta(:)  = psi_det(:,2,idx)
+         call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet)
+         tempCoeff(countdet) = psi_coef(idx, istate)*phasedet
+         norm_det1 += tempCoeff(countdet)*tempCoeff(countdet)
+         countdet += 1
+      enddo
+
+       !print *,"dimcoef=",bfIcfg,norm_det1
+       !call printMatrix(tempCoeff,ndetI,1)
+
+      s = 0
+      do k=1,N_int
+        if (psi_configuration(k,1,i) == 0_bit_kind) cycle
+        s = s + popcnt(psi_configuration(k,1,i))
+      enddo
+      salpha = (s+MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+      !bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+
+      ! perhaps blocking with CFGs of same seniority
+      ! can be more efficient
+      allocate(tempBuffer(bfIcfg,ndetI))
+      tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI)
+
+       call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1))
+       !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1)
+
+      deallocate(tempCoeff)
+      deallocate(tempBuffer)
+      psi_config_data(i,1) = countcsf + 1
+      do k=1,bfIcfg
+        psi_csf_to_config_data(countcsf+k) = i
+      enddo
+      countcsf += bfIcfg
+      psi_config_data(i,2) = countcsf
+  enddo
+  !$OMP END MASTER
+  !$OMP END PARALLEL
+  call omp_set_max_active_levels(4)
+
+  END_PROVIDER
+
+  BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)]
  &BEGIN_PROVIDER [ integer, rowsmax]
  &BEGIN_PROVIDER [ integer, colsmax]
   use cfunctions
@@ -186,14 +339,18 @@ end subroutine get_phase_qp_to_cfg
   cols = -1
   integer*8 MS
   MS = elec_alpha_num-elec_beta_num
-  integer nsomomin
   nsomomin = elec_alpha_num-elec_beta_num
   rowsmax = 0
   colsmax = 0
+  print *,"NSOMOMax = ",NSOMOMax
+  print *,"NSOMOMin = ",NSOMOMin
   !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2))
   ! Type
   ! 1. SOMO -> SOMO
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  !print *,"Doing SOMO->SOMO"
+  AIJpqMatrixDimsList(NSOMOMin,1,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,1,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i-2,i-2, 2
         Jsomo = ISHFT(1_8,j)-1
@@ -220,6 +377,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, "SOMO->SOMO \t",i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -227,15 +385,18 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows
-              AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols
+              AIJpqMatrixDimsList(nsomoi,1,k,l,1) = rows
+              AIJpqMatrixDimsList(nsomoi,1,k,l,2) = cols
            end do
         end do
      end do
   end do
   ! Type
   ! 2. DOMO -> VMO
-  do i = 0+iand(nsomomin,1), NSOMOMax, 2
+  !print *,"Doing DOMO->VMO"
+  AIJpqMatrixDimsList(NSOMOMin,2,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,2,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      tmpsomo = ISHFT(1_8,i+2)-1
      do j = i+2,i+2, 2
@@ -268,6 +429,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -275,8 +437,8 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows
-              AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols
+              AIJpqMatrixDimsList(nsomoi,2,k,l,1) = rows
+              AIJpqMatrixDimsList(nsomoi,2,k,l,2) = cols
            end do
         end do
      end do
@@ -284,15 +446,17 @@ end subroutine get_phase_qp_to_cfg
   ! Type
   ! 3. SOMO -> VMO
   !print *,"Doing SOMO->VMO"
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  AIJpqMatrixDimsList(NSOMOMin,3,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,3,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,j)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) then
            cycle
         end if
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,l-1)
@@ -307,6 +471,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -314,25 +479,27 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(i,j,3,k,l,1) = rows
-              AIJpqMatrixDimsList(i,j,3,k,l,2) = cols
+              AIJpqMatrixDimsList(i,3,k,l,1) = rows
+              AIJpqMatrixDimsList(i,3,k,l,2) = cols
            end do
         end do
      end do
   end do
   ! Type
-  ! 4. DOMO -> VMO
+  ! 4. DOMO -> SOMO
   !print *,"Doing DOMO->SOMO"
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  AIJpqMatrixDimsList(NSOMOMin,4,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,4,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      do j = i,i, 2
         if(j .GT. NSOMOMax .OR. j .LE. 0) then
            cycle
         end if
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
-                 Isomo = IBCLR(Isomo,k+1-1)
+                 Isomo = IBCLR(Isomo,k-1)
                  Jsomo = ISHFT(1_8,j+1)-1
                  Jsomo = IBCLR(Jsomo,l-1)
               else
@@ -344,6 +511,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -351,15 +519,16 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(i,j,4,k,l,1) = rows
-              AIJpqMatrixDimsList(i,j,4,k,l,2) = cols
+              AIJpqMatrixDimsList(i,4,k,l,1) = rows
+              AIJpqMatrixDimsList(i,4,k,l,2) = cols
            end do
         end do
      end do
   end do
+  print *,"Rowsmax=",rowsmax," Colsmax=",colsmax
   END_PROVIDER
 
-  BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)]
+  BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)]
   use cfunctions
   implicit none
   BEGIN_DOC
@@ -389,70 +558,79 @@ end subroutine get_phase_qp_to_cfg
   rows = -1
   cols = -1
   integer*8 MS
-  MS = 0
-  touch AIJpqMatrixDimsList
+  MS = elec_alpha_num-elec_beta_num
   real*8,dimension(:,:),allocatable :: meMatrix
   integer maxdim
-  !maxdim = max(rowsmax,colsmax)
-  ! allocate matrix
-  !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2))
+
   ! Type
   ! 1. SOMO -> SOMO
-  do i = 2, NSOMOMax, 2
+  AIJpqContainer = 0.d0
+  AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0
+  integer :: rows_old, cols_old
+  rows_old = -1
+  cols_old = -1
+  allocate(meMatrix(1,1))
+  do i = NSOMOMin+2, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
-     do j = i-2,i-2, 2
-        if(j .GT. NSOMOMax .OR. j .LT. 0) cycle
-        do k = 1,i
-           do l = 1,i
+     j=i-2
+     if(j .GT. NSOMOMax .OR. j .LT. 0) cycle
+     nsomoi = i
+     do k = 1,i
+        orbp = k
+        do l = 1,i
 
-              ! Define Jsomo
-              if(k .NE. l) then
-                 Jsomo = IBCLR(Isomo, k-1)
-                 Jsomo = IBCLR(Jsomo, l-1)
-                 nsomoi = i
-                 nsomoj = j
-              else
-                 Isomo = ISHFT(1_8,i)-1
-                 Jsomo = ISHFT(1_8,i)-1
-                 nsomoi = i
-                 nsomoj = i
-              endif
+           ! Define Jsomo
+           if(k .NE. l) then
+              Jsomo = IBCLR(Isomo, k-1)
+              Jsomo = IBCLR(Jsomo, l-1)
+              nsomoj = j
+           else
+              Isomo = ISHFT(1_8,i)-1
+              Jsomo = ISHFT(1_8,i)-1
+              nsomoj = i
+           endif
 
-              AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0
-              call getApqIJMatrixDims(Isomo,           &
-                   Jsomo, &
-                   MS,                       &
-                   rows,                     &
-                   cols)
+           call getApqIJMatrixDims(Isomo,           &
+                Jsomo, &
+                MS,                       &
+                rows,                     &
+                cols)
 
-              orbp = k
-              orbq = l
-              allocate(meMatrix(rows,cols))
-              meMatrix = 0.0d0
-              ! fill matrix
-              call getApqIJMatrixDriver(Isomo,           &
-                   Jsomo, &
-                   orbp,                     &
-                   orbq,                     &
-                   MS,                       &
-                   NMO,                      &
-                   meMatrix,                 &
-                   rows,                     &
-                   cols)
-              ! i -> j
-             do ri = 1,rows
-                 do ci = 1,cols
-                    AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci)
-                 end do
+           orbq = l
+           if ((rows /= rows_old).or.(cols /= cols_old)) then
+             deallocate(meMatrix)
+             allocate(meMatrix(rows,cols))
+             rows_old = rows
+             cols_old = cols
+           endif
+           meMatrix = 0.0d0
+           ! fill matrix
+           call getApqIJMatrixDriver(Isomo,           &
+                Jsomo, &
+                orbp,                     &
+                orbq,                     &
+                MS,                       &
+                NMO,                      &
+                meMatrix,                 &
+                rows,                     &
+                cols)
+           ! i -> j
+          do ri = 1,rows
+              do ci = 1,cols
+                 AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci)
               end do
-              deallocate(meMatrix)
            end do
         end do
      end do
   end do
+  deallocate(meMatrix)
+
   ! Type
   ! 2. DOMO -> VMO
-  do i = 0, NSOMOMax, 2
+  !print *,"Doing DOMO -> VMO"
+  !AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      tmpsomo = ISHFT(1_8,i+2)-1
      do j = i+2,i+2, 2
@@ -477,7 +655,12 @@ end subroutine get_phase_qp_to_cfg
                  nsomoj = j
               endif
 
-              AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -498,10 +681,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
+             !call printMatrix(meMatrix,rows,cols)
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -511,13 +697,16 @@ end subroutine get_phase_qp_to_cfg
   end do
   ! Type
   ! 3. SOMO -> VMO
-  do i = 2, NSOMOMax, 2
+  !print *,"Doing SOMO -> VMO"
+  !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1:2,1:2,3,NSOMOMin) = 1.0d0
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,j)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) cycle
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,l-1)
@@ -528,7 +717,12 @@ end subroutine get_phase_qp_to_cfg
                  Jsomo = ISHFT(1_8,j)-1
               endif
 
-              AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(i,3,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,3,i) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -549,10 +743,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !call printMatrix(meMatrix,rows,cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -562,24 +759,35 @@ end subroutine get_phase_qp_to_cfg
   end do
   ! Type
   ! 4. DOMO -> SOMO
-  do i = 2, NSOMOMax, 2
+  !print *,"Doing DOMO -> SOMO"
+  !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0
+  AIJpqContainer(1,1,2,2,4,NSOMOMin) = 1.0d0
+  AIJpqContainer(1,1,2,1,4,NSOMOMin) =-1.0d0
+  AIJpqContainer(1,1,1,2,4,NSOMOMin) =-1.0d0
+  do i = NSOMOMin+2, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,i)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) cycle
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,k-1)
                  Jsomo = ISHFT(1_8,j+1)-1
-                 Jsomo = IBCLR(Jsomo,l+1-1)
+                 Jsomo = IBCLR(Jsomo,l-1)
               else
                  Isomo = ISHFT(1_8,i)-1
                  Jsomo = ISHFT(1_8,j)-1
               endif
 
-              AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(i,4,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,4,i) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -601,10 +809,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !call printMatrix(meMatrix,rows,cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -614,93 +825,1215 @@ end subroutine get_phase_qp_to_cfg
   end do
   END_PROVIDER
 
-
-!!!!!!
-
-  BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)]
- &BEGIN_PROVIDER [ real*8, psi_coef_config,  (n_CSF)]
- &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)]
-  use cfunctions
+subroutine calculate_preconditioner_cfg(diag_energies)
+  implicit none
   use bitmasks
+  BEGIN_DOC
+  ! Documentation for calculate_preconditioner
+  !
+  ! Calculates the diagonal energies of
+  ! the configurations in psi_configuration
+  ! returns : diag_energies :
+  END_DOC
+  integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj
+  real*8,intent(out) :: diag_energies(n_CSF)
+  integer                            :: nholes
+  integer                            :: nvmos
+  integer                            :: listvmos(mo_num)
+  integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                            :: listholes(mo_num)
+  integer                            :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer*8                          :: Idomo
+  integer*8                          :: Isomo
+  integer*8                          :: Jdomo
+  integer*8                          :: Jsomo
+  integer*8                          :: diffSOMO
+  integer*8                          :: diffDOMO
+  integer                            :: NSOMOI
+  integer                            :: NSOMOJ
+  integer                            :: ndiffSOMO
+  integer                            :: ndiffDOMO
+  integer                            :: starti, endi, cnti, cntj, rows,cols
+  integer                            :: extype,pmodel,qmodel
+  integer(bit_kind) :: Icfg(N_INT,2)
+  integer(bit_kind) :: Jcfg(N_INT,2)
+  integer,external  :: getNSOMO
+  real*8, external  :: mo_two_e_integral
+  real*8            :: hpp
+  real*8            :: meCC
+  real*8            :: ecore
+  real*8            :: core_act_contrib
+
+  !PROVIDE h_core_ri
+  PROVIDE core_fock_operator
+  PROVIDE h_act_ri
+  ! initialize energies
+  diag_energies = 0.d0
+  !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num
+
+  ! calculate core energy
+  !call get_core_energy(ecore)
+  diag_energies = core_energy - nuclear_repulsion
+
+  ! calculate the core energy
+  !print *,"Core 2energy=",ref_bitmask_energy
+
+  do i=1,N_configuration
+
+     Isomo = psi_configuration(1,1,i)
+     Idomo = psi_configuration(1,2,i)
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     NSOMOI = getNSOMO(psi_configuration(:,:,i))
+
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+
+     core_act_contrib = 0.0d0
+
+     ! find out all pq holes possible
+     nholes = 0
+     ! holes in SOMO
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+           nholes += 1
+           listholes(nholes) = k
+           holetype(nholes) = 1
+        endif
+     enddo
+     ! holes in DOMO
+     !do k = n_core_orb+1,n_core_orb + n_act_orb
+     !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+           nholes += 1
+           listholes(nholes) = k
+           holetype(nholes) = 2
+        endif
+     enddo
+
+     ! find vmos
+     listvmos = -1
+     vmotype = -1
+     nvmos = 0
+     !do k = n_core_orb+1,n_core_orb + n_act_orb
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+        if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+           nvmos += 1
+           listvmos(nvmos) = k
+           vmotype(nvmos) = 0
+        else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+           nvmos += 1
+           listvmos(nvmos) = k
+           vmotype(nvmos) = 1
+        end if
+     enddo
+     !print *,"I=",i
+     !call debug_spindet(psi_configuration(1,1,i),N_int)
+     !call debug_spindet(psi_configuration(1,2,i),N_int)
+
+     do k=1,nholes
+        p = listholes(k)
+        noccp = holetype(k)
+
+
+        ! core-virtual
+        do l = 1, n_core_orb
+         jj = list_core(l)
+         core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p))
+        enddo
+
+        ! Calculate one-electron
+        ! and two-electron coulomb terms
+        do l=1,nholes
+           q = listholes(l)
+           noccq = holetype(l)
+           !print *,"--------------- K=",p," L=",q
+
+           ! one-electron term
+           if(p.EQ.q) then
+              hpp = noccq * h_act_ri(p,q)!mo_one_e_integrals(q,q)
+           else
+              hpp = 0.d0
+           endif
+
+
+           do j=starti,endi
+              ! coulomb term
+              ! (pp,qq) = <pq|pq>
+              if(p.EQ.q) then
+                 diag_energies(j) += hpp !+ 0.5d0 * (noccp * noccq * mo_two_e_integral(p,q,p,q))
+                 !print *,"hpp=",hpp,"diga= ",diag_energies(j)
+!             else
+!                diag_energies(j) +=     !  0.5d0 * noccp * noccq * mo_two_e_integral(p,q,p,q)
+!                print *,"diga= ",diag_energies(j)
+              endif
+           enddo
+        enddo
+
+     enddo
+     !print *,"I=",i," core_act=",core_act_contrib
+     do j=starti,endi
+       diag_energies(j) += core_act_contrib
+     end do
+  enddo
+
+end subroutine calculate_preconditioner_cfg
+
+subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! Ialpha by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer          ,intent(in)             :: nconnectedJ
+  integer(bit_kind),intent(in)             :: listconnectedJ(N_int,2,*)
+  integer(bit_kind),intent(in)             :: Ialpha(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(in)             :: idslistconnectedJ(*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: excitationIds(2,*)
+  integer,intent(out)                      :: excitationTypes(*)
+  real*8 ,intent(out)                      :: diagfactors(*)
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                                  :: nxordiffSOMODOMO
+  integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ
+  integer :: listholes(mo_num)
+  integer :: holetype(mo_num)
+  integer :: end_index
+  integer :: Nsomo_alpha
+  logical :: isOKlistJ
+
+  PROVIDE DetToCSFTransformationMatrix
+
+  isOKlistJ = .False.
+
+  nconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Nsomo_alpha = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  !end_index = N_configuration
+
+
+  p = 0
+  q = 0
+  do i=1,nconnectedJ
+     idxJ = idslistconnectedJ(i)
+     Isomo = Ialpha(1,1)
+     Idomo = Ialpha(1,2)
+     Jsomo = listconnectedJ(1,1,i)
+     Jdomo = listconnectedJ(1,2,i)
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        select case(ndiffDOMO)
+        case (0)
+           ! SOMO -> VMO
+           !print *,"obt SOMO -> VMO"
+           extyp = 3
+           IJsomo = IEOR(Isomo, Jsomo)
+IRP_IF WITHOUT_TRAILZ
+           p = (popcnt(ieor( IAND(Isomo,IJsomo), IAND(Isomo,IJsomo)-1)) -1) + 1
+IRP_ELSE
+           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+IRP_ENDIF
+        case (1)
+           ! DOMO -> VMO
+           ! or
+           ! SOMO -> SOMO
+           nsomoJ = POPCNT(Jsomo)
+           nsomoalpha = POPCNT(Isomo)
+           if(nsomoJ .GT. nsomoalpha) then
+              ! DOMO -> VMO
+              !print *,"obt DOMO -> VMO"
+              extyp = 2
+IRP_IF WITHOUT_TRAILZ
+              p = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+IRP_ELSE
+              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+IRP_ELSE
+              q = TRAILZ(Isomo) + 1
+IRP_ENDIF
+           else
+              ! SOMO -> SOMO
+              !print *,"obt SOMO -> SOMO"
+              extyp = 1
+IRP_IF WITHOUT_TRAILZ
+              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+IRP_ELSE
+              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,q-1)
+IRP_IF WITHOUT_TRAILZ
+              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+IRP_ELSE
+              p = TRAILZ(Isomo) + 1
+IRP_ENDIF
+           end if
+        case (2)
+           ! DOMO -> SOMO
+           !print *,"obt DOMO -> SOMO"
+           extyp = 4
+           IJsomo = IEOR(Isomo, Jsomo)
+IRP_IF WITHOUT_TRAILZ
+           p = (popcnt(ieor(IAND(Jsomo,IJsomo) ,IAND(Jsomo,IJsomo) -1))-1) + 1
+IRP_ELSE
+           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+IRP_ENDIF
+        case default
+           print *,"something went wront in get connectedI"
+        end select
+        starti = psi_config_data(idxJ,1)
+        endi   = psi_config_data(idxJ,2)
+        nconnectedI += 1
+        connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+        idxs_connectedI(nconnectedI)=starti
+        excitationIds(1,nconnectedI)=p
+        excitationIds(2,nconnectedI)=q
+        excitationTypes(nconnectedI) = extyp
+        diagfactors(nconnectedI) = 1.0d0
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = listconnectedJ(1,1,i)
+        Idomo = listconnectedJ(1,2,i)
+        do ii = 1,mo_num
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do ii = 1,mo_num
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+
+        do k=1,nholes
+           p = listholes(k)
+           q = p
+           extyp = 1
+           if(holetype(k) .EQ. 1) then
+              starti = psi_config_data(idxJ,1)
+              endi   = psi_config_data(idxJ,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 1.0d0
+           else
+              starti = psi_config_data(idxJ,1)
+              endi   = psi_config_data(idxJ,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 2.0d0
+           endif
+        enddo
+     endif
+  end do
+
+end subroutine obtain_connected_I_foralpha_fromfilterdlist
+
+
+subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel)
   implicit none
   BEGIN_DOC
-  ! Documentation for DetToCSFTransformationMatrix
-  ! Provides the matrix of transformatons for the
-  ! conversion between determinant to CSF basis (in BFs)
+  ! This function converts the orbital ids
+  ! in real space to those used in model space
+  ! in order to identify the matrices required
+  ! for the calculation of MEs.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
   END_DOC
-  integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int)
-  integer   :: rows, cols, i, j, k
-  integer   :: startdet, enddet
-  integer*8 MS, Isomo, Idomo
-  integer ndetI
-  integer :: getNSOMO
-  real*8,dimension(:,:),allocatable    :: tempBuffer
-  real*8,dimension(:),allocatable    :: tempCoeff
-  real*8  :: norm_det1, phasedet
-  norm_det1 = 0.d0
-  MS = elec_alpha_num - elec_beta_num
-  ! initialization
-  psi_coef_config = 0.d0
-  DetToCSFTransformationMatrix(0,:,:) = 1.d0
-  do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2
-    Isomo = IBSET(0_8, i) - 1_8
-    ! rows = Ncsfs
-    ! cols = Ndets
-    bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1))))
-    ndetI = max(1,nint((binom(i,(i+1)/2))))
+  integer(bit_kind),intent(in)   :: Ialpha(N_int,2)
+  integer(bit_kind),intent(in)   :: Jcfg(N_int,2)
+  integer,intent(in)             :: p,q
+  integer,intent(in)             :: extype
+  integer,intent(out)            :: pmodel,qmodel
+  !integer(bit_kind)              :: Isomo(N_int)
+  !integer(bit_kind)              :: Idomo(N_int)
+  !integer(bit_kind)              :: Jsomo(N_int)
+  !integer(bit_kind)              :: Jdomo(N_int)
+  integer*8                       :: Isomo
+  integer*8                       :: Idomo
+  integer*8                       :: Jsomo
+  integer*8                       :: Jdomo
+  integer*8                      :: mask
+  integer                        :: iint, ipos
+  !integer(bit_kind)              :: Isomotmp(N_int)
+  !integer(bit_kind)              :: Jsomotmp(N_int)
+  integer*8             :: Isomotmp
+  integer*8             :: Jsomotmp
+  integer                        :: pos0,pos0prev
 
-    allocate(tempBuffer(bfIcfg,ndetI))
-    call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer)
-    DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) =  tempBuffer(1:bfIcfg,1:ndetI)
-    deallocate(tempBuffer)
+  ! TODO Flag (print) when model space indices is > 64
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Jsomo = Jcfg(1,1)
+  Jdomo = Jcfg(1,2)
+  pos0prev = 0
+  pmodel = p
+  qmodel = q
+
+  if(p .EQ. q) then
+     pmodel = 1
+     qmodel = 1
+  else
+     select case(extype)
+       case (1)
+          ! SOMO -> SOMO
+          ! remove all domos
+          !print *,"type -> SOMO -> SOMO"
+          mask = ISHFT(1_8,p) - 1
+          Isomotmp = IAND(Isomo,mask)
+          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          mask = ISHFT(1_8,q) - 1
+          Isomotmp = IAND(Isomo,mask)
+          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+       case (2)
+          ! DOMO -> VMO
+          ! remove all domos except one at p
+          !print *,"type -> DOMO -> VMO"
+          mask = ISHFT(1_8,p) - 1
+          Jsomotmp = IAND(Jsomo,mask)
+          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          mask = ISHFT(1_8,q) - 1
+          Jsomotmp = IAND(Jsomo,mask)
+          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+       case (3)
+          ! SOMO -> VMO
+          !print *,"type -> SOMO -> VMO"
+          !Isomo = IEOR(Isomo,Jsomo)
+          if(p.LT.q) then
+             mask = ISHFT(1_8,p) - 1
+             Isomo = IAND(Isomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+             mask = ISHFT(1_8,q) - 1
+             Jsomo = IAND(Jsomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+          else
+             mask = ISHFT(1_8,p) - 1
+             Isomo = IAND(Isomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+             mask = ISHFT(1_8,q) - 1
+             Jsomo = IAND(Jsomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+          endif
+       case (4)
+          ! DOMO -> SOMO
+          ! remove all domos except one at p
+          !print *,"type -> DOMO -> SOMO"
+          !Isomo = IEOR(Isomo,Jsomo)
+          if(p.LT.q) then
+             mask = ISHFT(1_8,p) - 1
+             Jsomo = IAND(Jsomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+             mask = ISHFT(1_8,q) - 1
+             Isomo = IAND(Isomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+          else
+             mask = ISHFT(1_8,p) - 1
+             Jsomo = IAND(Jsomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+             mask = ISHFT(1_8,q) - 1
+             Isomo = IAND(Isomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+          endif
+       case default
+          print *,"something is wrong in convertOrbIdsToModelSpaceIds"
+     end select
+  endif
+  !print *,p,q,"model ids=",pmodel,qmodel
+end subroutine convertOrbIdsToModelSpaceIds
+
+subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep)
+  implicit none
+  use bitmasks
+  use omp_lib
+  BEGIN_DOC
+  ! Documentation for sigma-vector calculation
+  !
+  ! Calculates the result of the
+  ! application of the hamiltonian to the
+  ! wavefunction in CFG basis once
+  ! TODO : Things prepare outside this routine
+  !  1. Touch the providers for
+  !     a. ApqIJ containers
+  !     b. DET to CSF transformation matrices
+  !  2. DET to CSF transcormation
+  !  2. CSF to DET back transcormation
+  ! returns : psi_coef_out_det :
+  END_DOC
+  integer,intent(in)             :: sze, istart,iend, istep, ishift, n_st
+  real*8,intent(in)              :: psi_in(n_st,sze)
+  real*8,intent(out)             :: psi_out(n_st,sze)
+  integer(bit_kind)              :: Icfg(N_INT,2)
+  integer                        :: i,j,k,l,p,q,noccp,noccq, m, n, idxI, nocck,orbk
+  integer                        :: ii,jj,kk,ll,pp,qq
+  integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ
+  integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
+  integer(bit_kind),dimension(:,:,:),allocatable :: singlesI
+  integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha
+  integer,dimension(:),allocatable :: idxs_singlesI
+  integer,dimension(:),allocatable :: idxs_connectedI_alpha
+  integer,dimension(:,:),allocatable :: excitationIds_single
+  integer,dimension(:),allocatable :: excitationTypes_single
+  integer,dimension(:,:),allocatable :: excitationIds
+  integer,dimension(:),allocatable :: excitationTypes
+  integer,dimension(:),allocatable :: idslistconnectedJ
+  real*8,dimension(:),allocatable :: diagfactors
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI
+  integer                        :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel
+  integer                        :: getNSOMO
+  integer                        :: totcolsTKI
+  integer                        :: rowsTKI
+  integer                        :: noccpp
+  integer                        :: istart_cfg, iend_cfg, num_threads_max
+  integer                        :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ
+  integer*8                      :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
+  integer                        :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
+  real*8                         :: norm_coef_cfg, fac2eints
+  real*8                         :: norm_coef_det
+  real*8                         :: meCC1, meCC2, diagfac
+  real*8,dimension(:,:,:),allocatable :: TKI
+  real*8,dimension(:,:),allocatable :: GIJpqrs
+  real*8,dimension(:,:,:),allocatable :: TKIGIJ
+  real*8,dimension(:),allocatable :: psi_out_tmp
+  real*8,dimension(:,:),allocatable :: CCmattmp
+  real*8, external               :: mo_two_e_integral
+  real*8, external               :: get_two_e_integral
+  real*8,dimension(:),allocatable:: diag_energies
+  real*8                         :: tmpvar, tmptot
+  real*8                         :: core_act_contrib
+
+  integer(omp_lock_kind), allocatable :: lock(:)
+  call omp_set_max_active_levels(1)
+
+  !print *," sze = ",sze
+  allocate(lock(sze))
+  do i=1,sze
+    call omp_init_lock(lock(i))
+  enddo
+  !do i=1,size(psi_config_data,1)
+  !  print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2)
+  !end do
+
+  allocate(diag_energies(n_CSF))
+  call calculate_preconditioner_cfg(diag_energies)
+  !print *," diag energy =",diag_energies(1)
+
+  MS = 0
+  norm_coef_cfg=0.d0
+
+  psi_out=0.d0
+
+  istart_cfg = psi_csf_to_config_data(istart)
+  iend_cfg   = psi_csf_to_config_data(iend)
+
+  !nconnectedtotalmax = 1000
+  !nconnectedmaxJ = 1000
+  maxnalphas = elec_num*mo_num
+  Icfg(1,1) = psi_configuration(1,1,1)
+  Icfg(1,2) = psi_configuration(1,2,1)
+  allocate(listconnectedJ(N_INT,2,max(sze,10000)))
+  allocate(idslistconnectedJ(max(sze,10000)))
+  call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax)
+  deallocate(listconnectedJ)
+  deallocate(idslistconnectedJ)
+
+  integer*8, allocatable :: bit_tmp(:)
+  integer*8, external :: configuration_search_key
+  double precision :: diagfactors_0
+  allocate( bit_tmp(0:N_configuration+1))
+  do j=1,N_configuration
+    bit_tmp(j) = configuration_search_key(psi_configuration(1,1,j),N_int)
   enddo
 
-  integer s, bfIcfg
-  integer countcsf
-  countcsf = 0
-  integer countdet
-  countdet = 0
-  integer idx
-  integer istate
-  istate = 1
-  phasedet = 1.0d0
-  do i = 1,N_configuration
-      startdet = psi_configuration_to_psi_det(1,i)
-      enddet = psi_configuration_to_psi_det(2,i)
-      ndetI = enddet-startdet+1
+  call omp_set_max_active_levels(1)
+  !$OMP PARALLEL                                              & 
+      !$OMP DEFAULT(NONE)                               &
+      !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,&
+      !$OMP    holetype, vmotype, nvmos, listvmos, starti, endi,     &
+      !$OMP    nsinglesI, singlesI,idxs_singlesI,excitationIds_single,&
+      !$OMP    excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,&
+      !$OMP    Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,&
+      !$OMP   nconnectedJ,listconnectedJ,idslistconnectedJ,ntotJ,       &
+      !$OMP   Nalphas_Icfg,alphas_Icfg,connectedI_alpha,             &
+      !$OMP   idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,&
+      !$OMP   totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq,                &
+      !$OMP   colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,&
+      !$OMP   diagfac, tmpvar, diagfactors_0)                                            &
+      !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,&
+      !$OMP    N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,&
+      !$OMP     pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp,       &
+      !$OMP     AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,&
+      !$OMP     n_core_orb, n_act_orb, list_act, n, list_core,  list_core_is_built,core_act_contrib, num_threads_max,&
+      !$OMP     n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built)
 
-      allocate(tempCoeff(ndetI))
-      countdet = 1
-      do j = startdet, enddet
-         idx = psi_configuration_to_psi_det_data(j)
-         Ialpha(:) = psi_det(:,1,idx)
-         Ibeta(:)  = psi_det(:,2,idx)
-         call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet)
-         tempCoeff(countdet) = psi_coef(idx, istate)*phasedet
-         norm_det1 += tempCoeff(countdet)*tempCoeff(countdet)
-         countdet += 1
+  allocate(singlesI(N_INT,2,max(sze,10000)))
+  allocate(idxs_singlesI(max(sze,10000)))
+  allocate(excitationIds_single(2,max(sze,10000)))
+  allocate(excitationTypes_single(max(sze,10000)))
+!
+
+  !!!====================!!!
+  !!! Single Excitations !!!
+  !!!====================!!!
+
+  !$OMP DO SCHEDULE(dynamic,16)
+  do i=istart_cfg,iend_cfg
+
+    ! if Seniority_range > 8 then
+    ! continue
+    ! else
+    ! cycle
+
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     Isomo = Icfg(1,1)
+     Idomo = Icfg(1,2)
+     NSOMOI = getNSOMO(Icfg)
+
+     ! find out all pq holes possible
+     nholes = 0
+     ! holes in SOMO
+     ! list_act
+     ! list_core
+     ! list_core_inact
+     ! bitmasks
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+         nholes += 1
+         listholes(nholes) = k
+         holetype(nholes) = 1
+       endif
+     enddo
+     ! holes in DOMO
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+         nholes += 1
+         listholes(nholes) = k
+         holetype(nholes) = 2
+       endif
+     enddo
+
+     ! find vmos
+     listvmos = -1
+     vmotype = -1
+     nvmos = 0
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+       if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+         nvmos += 1
+         listvmos(nvmos) = k
+         vmotype(nvmos) = 0
+       else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+         nvmos += 1
+         listvmos(nvmos) = k
+         vmotype(nvmos) = 1
+       end if
+     enddo
+
+
+     ! Icsf ids
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+     NSOMOI = getNSOMO(Icfg)
+
+     call generate_all_singles_cfg_with_type(bit_tmp,Icfg,singlesI,idxs_singlesI,excitationIds_single,&
+         excitationTypes_single,nsinglesI,N_int)
+
+     do j = 1,nsinglesI
+       idxI = idxs_singlesI(j)
+       NSOMOJ = getNSOMO(singlesI(1,1,j))
+      p = excitationIds_single(1,j)
+      q = excitationIds_single(2,j)
+      extype = excitationTypes_single(j)
+      ! Off diagonal terms
+      call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel)
+      Jsomo = singlesI(1,1,j)
+      Jdomo = singlesI(1,2,j)
+
+      ! Add the hole on J
+      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes += 1
+        listholes(nholes) = q
+        holetype(nholes) = 1
+      endif
+      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes += 1
+        listholes(nholes) = q
+        holetype(nholes) = 2
+      endif
+
+      startj = psi_config_data(idxI,1)
+      endj   = psi_config_data(idxI,2)
+      !print *,"i=",i," idxI=",idxI," startj=",startj," endj=",endj," sze=",sze
+
+      !!! One-electron contribution !!!
+      do ii = starti, endi
+        cnti = ii-starti+1
+        do jj = startj, endj
+          cntj = jj-startj+1
+          !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q)
+          core_act_contrib = 0.0d0
+          if(p.ne.q)then
+            do pp=1,n_core_orb
+              n=list_core(pp)
+              core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map)
+            end do
+          endif
+          meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib)
+          !if(jj.eq.1.and.ii.eq.1)then
+          !  print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q
+          !endif
+          call omp_set_lock(lock(jj))
+          do kk = 1,n_st
+            psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii)
+          enddo
+          call omp_unset_lock(lock(jj))
+        enddo
       enddo
 
-      s = 0
-      do k=1,N_int
-        if (psi_configuration(k,1,i) == 0_bit_kind) cycle
-        s = s + popcnt(psi_configuration(k,1,i))
-      enddo
-      bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
-
-      ! perhaps blocking with CFGs of same seniority
-      ! can be more efficient
-      allocate(tempBuffer(bfIcfg,ndetI))
-      tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI)
-
-       call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1))
-       !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1)
-
-      deallocate(tempCoeff)
-      deallocate(tempBuffer)
-      psi_config_data(i,1) = countcsf + 1
-      countcsf += bfIcfg
-      psi_config_data(i,2) = countcsf
+      ! Undo setting in listholes
+      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes -= 1
+      endif
+      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes -= 1
+      endif
+    enddo
   enddo
+  !$OMP END DO
+  deallocate(singlesI)
+  deallocate(idxs_singlesI)
+  deallocate(excitationIds_single)
+  deallocate(excitationTypes_single)
 
-  END_PROVIDER
+  !print *," singles part psi(1,1)=",psi_out(1,1)
+  
+  allocate(listconnectedJ(N_INT,2,max(sze,10000)))
+  allocate(alphas_Icfg(N_INT,2,max(sze,10000)))
+  allocate(connectedI_alpha(N_INT,2,max(sze,10000)))
+  allocate(idxs_connectedI_alpha(max(sze,10000)))
+  allocate(excitationIds(2,max(sze,10000)))
+  allocate(excitationTypes(max(sze,10000)))
+  allocate(diagfactors(max(sze,10000)))
+  allocate(idslistconnectedJ(max(sze,10000)))
+  allocate(CCmattmp(n_st,NBFmax))
+
+  !!!====================!!!
+  !!! Double Excitations !!!
+  !!!====================!!!
+
+  ! Loop over all selected configurations
+  !$OMP DO SCHEDULE(static)
+  do i = istart_cfg,iend_cfg
+
+     ! if Seniority_range > 8 then
+     ! continue
+     ! else
+     ! cycle
+
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+
+     ! Returns all unique (checking the past) singly excited cfgs connected to I
+     Nalphas_Icfg = 0
+     ! TODO:
+     ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate
+
+     Nalphas_Icfg = NalphaIcfg_list(i)
+     alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
+     if(Nalphas_Icfg .GT. maxnalphas) then
+       print *,"Nalpha > maxnalpha"
+     endif
+
+     call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
+
+     ! TODO : remove doubly excited for return
+     !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5)
+     do k = 1,Nalphas_Icfg
+        ! Now generate all singly excited with respect to a given alpha CFG
+
+        !call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, &
+        !  listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, &
+        !  excitationIds,excitationTypes,diagfactors)
+
+        call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, &
+                                         nconnectedI, excitationIds, excitationTypes, diagfactors)
+
+        
+        if(nconnectedI .EQ. 0) then
+           cycle
+        endif
+
+        !if(i .EQ. 1) then
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k))
+        !endif
+
+        ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
+        totcolsTKI = 0
+        rowsTKI = -1
+        NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k))
+        do j = 1,nconnectedI
+           NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+           ! for E_pp E_rs and E_ppE_rr case
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype
+           totcolsTKI += colsikpq
+           rowsTKI = rowsikpq
+        enddo
+
+        allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
+        ! Initialize the integral container
+        ! dims : (totcolsTKI, nconnectedI)
+        allocate(GIJpqrs(totcolsTKI,nconnectedI))  ! gpqrs
+        allocate(TKIGIJ(n_st,rowsTKI,nconnectedI))  ! TKI * gpqrs
+        !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI
+        TKI = 0.d0
+        GIJpqrs = 0.d0
+        TKIGIJ = 0.d0
+
+        totcolsTKI = 0
+        do j = 1,nconnectedI
+           NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel)
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           rowsTKI = rowsikpq
+           !if(i.eq.1) then
+           !  print *,rowsTKI,colsikpq," | ",pmodel,qmodel,extype,NSOMOalpha
+           !endif
+           do m = 1,colsikpq
+              do l = 1,rowsTKI
+              do kk = 1,n_st
+                 TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) &
+                    * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
+              enddo
+           !if(i.eq.1) then
+           !      print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha)
+           !endif
+           enddo
+           enddo
+
+           diagfactors_0 = diagfactors(j)*0.5d0
+           moi = excitationIds(1,j) ! p
+           mok = excitationIds(2,j) ! q
+           do l=1,nconnectedI
+            moj = excitationIds(2,l) ! s
+            mol = excitationIds(1,l) ! r
+            diagfac =  diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = <ps,qr>
+            !print *,"p=",mok,"q=",mol,"r=",moi,"s=",moj
+            do m = 1,colsikpq
+               ! <ij|kl> = (ik|jl)
+               GIJpqrs(totcolsTKI+m,l) = diagfac
+            enddo
+           enddo
+           totcolsTKI += colsikpq
+        enddo
+
+
+        ! Do big BLAS
+        call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,  &
+             TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, &
+             TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
+
+
+        ! Collect the result
+        totcolsTKI = 0
+        do j = 1,nconnectedI
+           NSOMOI     = getNSOMO(connectedI_alpha(1,1,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           rowsTKI = rowsikpq
+           CCmattmp = 0.d0
+
+           call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0,        &
+               TKIGIJ(1,1,j), size(TKIGIJ,1),                        &
+               AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha),  &
+               size(AIJpqContainer,1), 0.d0,                         &
+               CCmattmp, size(CCmattmp,1) )
+
+           do m = 1,colsikpq
+              call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1))
+              do kk = 1,n_st
+                 psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m)
+              enddo
+              call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1))
+           enddo
+           totcolsTKI += colsikpq
+        enddo
+
+        deallocate(TKI) ! coefficients of CSF
+        deallocate(GIJpqrs)  ! gpqrs
+        deallocate(TKIGIJ)  ! gpqrs
+
+     enddo ! loop over alphas
+  enddo ! loop over I
+  !$OMP END DO
+  call omp_set_max_active_levels(4)
+  deallocate(CCmattmp)
+  deallocate(connectedI_alpha)
+  deallocate(idxs_connectedI_alpha)
+  deallocate(excitationIds)
+  deallocate(excitationTypes)
+  deallocate(diagfactors)
+
+  !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb
+  !print *," psi(1,1)=",psi_out(1,1)
+
+  ! Add the diagonal contribution
+  !$OMP DO
+  do i = 1,n_CSF
+    do kk=1,n_st
+     psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+  call omp_set_max_active_levels(4)
+
+  deallocate(diag_energies)
+  deallocate(bit_tmp)
+
+end subroutine calculate_sigma_vector_cfg_nst_naive_store
+
+
+
+
+subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for sigma-vector calculation
+  !
+  ! Calculates the result of the
+  ! application of the hamiltonian to the
+  ! wavefunction in CFG basis once
+  ! TODO : Things prepare outside this routine
+  !  1. Touch the providers for
+  !     a. ApqIJ containers
+  !     b. DET to CSF transformation matrices
+  !  2. DET to CSF transcormation
+  !  2. CSF to DET back transcormation
+  ! returns : psi_coef_out_det :
+  END_DOC
+  integer,intent(in)             :: sze, istart,iend, istep, ishift, n_st
+  real*8,intent(in)              :: psi_in(sze,n_st)
+  real*8,intent(out)             :: psi_out(sze,n_st)
+  integer(bit_kind)              :: Icfg(N_INT,2)
+  integer                        :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk
+  integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
+  integer(bit_kind),dimension(:,:,:),allocatable :: singlesI
+  integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha
+  integer,dimension(:),allocatable :: idxs_singlesI
+  integer,dimension(:),allocatable :: idxs_connectedI_alpha
+  integer,dimension(:,:),allocatable :: excitationIds_single
+  integer,dimension(:),allocatable :: excitationTypes_single
+  integer,dimension(:,:),allocatable :: excitationIds
+  integer,dimension(:),allocatable :: excitationTypes
+  real*8,dimension(:),allocatable :: diagfactors
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI
+  integer                        :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel
+  integer                        :: getNSOMO
+  integer                        :: totcolsTKI
+  integer                        :: rowsTKI
+  integer                        :: noccpp
+  integer                        :: istart_cfg, iend_cfg
+  integer*8                      :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
+  integer                        :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
+  real*8                         :: norm_coef_cfg, fac2eints
+  real*8                         :: norm_coef_det
+  real*8                         :: meCC1, meCC2, diagfac
+  real*8,dimension(:,:,:),allocatable :: TKI
+  real*8,dimension(:,:),allocatable :: GIJpqrs
+  real*8,dimension(:,:,:),allocatable :: TKIGIJ
+  real*8, external               :: mo_two_e_integral
+  real*8, external               :: get_two_e_integral
+  real*8                         :: diag_energies(n_CSF)
+
+  ! allocate
+  allocate(alphas_Icfg(N_INT,2,max(sze/2,100)))
+  allocate(singlesI(N_INT,2,max(sze/2,100)))
+  allocate(connectedI_alpha(N_INT,2,max(sze/2,100)))
+  allocate(idxs_singlesI(max(sze/2,100)))
+  allocate(idxs_connectedI_alpha(max(sze/2,100)))
+  allocate(excitationIds_single(2,max(sze/2,100)))
+  allocate(excitationTypes_single(max(sze/2,100)))
+  allocate(excitationIds(2,max(sze/2,100)))
+  allocate(excitationTypes(max(sze/2,100)))
+  allocate(diagfactors(max(sze/2,100)))
+
+
+  !print *," sze = ",sze
+  call calculate_preconditioner_cfg(diag_energies)
+
+  MS = 0
+  norm_coef_cfg=0.d0
+
+  psi_out=0.d0
+
+  istart_cfg = psi_csf_to_config_data(istart)
+  iend_cfg   = psi_csf_to_config_data(iend)
+
+
+  !!! Single Excitations !!!
+  do i=istart_cfg,iend_cfg
+    print *,"I=",i
+
+    ! if Seniority_range > 8 then
+    ! continue
+    ! else
+    ! cycle
+
+    Icfg(1,1) = psi_configuration(1,1,i)
+    Icfg(1,2) = psi_configuration(1,2,i)
+    starti = psi_config_data(i,1)
+    endi   = psi_config_data(i,2)
+
+    ! Returns all unique (checking the past) singly excited cfgs connected to I
+    Nalphas_Icfg = 0
+    ! TODO:
+    ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate
+    !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg)
+    Nalphas_Icfg = NalphaIcfg_list(i)
+    alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
+
+    ! TODO : remove doubly excited for return
+    ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
+    do k = 1,Nalphas_Icfg
+      ! Now generate all singly excited with respect to a given alpha CFG
+      call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
+
+      totcolsTKI = 0
+      rowsTKI = -1
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+        ! for E_pp E_rs and E_ppE_rr case
+        if(p.EQ.q) then
+          NSOMOalpha = NSOMOI
+        endif
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        totcolsTKI += colsikpq
+!        if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then
+!          print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI
+!          !rowsTKI = rowsikpq
+!        else
+          rowsTKI = rowsikpq
+!        endif
+      enddo
+
+      allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
+      ! Initialize the inegral container
+      ! dims : (totcolsTKI, nconnectedI)
+      allocate(GIJpqrs(totcolsTKI,nconnectedI))  ! gpqrs
+      allocate(TKIGIJ(n_st,rowsTKI,nconnectedI))  ! TKI * gpqrs
+
+      totcolsTKI = 0
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        do m = 1,colsikpq
+          do l = 1,rowsTKI
+            do kk = 1,n_st
+              TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
+            enddo
+          enddo
+        enddo
+        do m = 1,colsikpq
+          do l = 1,nconnectedI
+            ! <ij|kl> = (ik|jl)
+            moi = excitationIds(1,j) ! p
+            mok = excitationIds(2,j) ! q
+            moj = excitationIds(2,l) ! s
+            mol = excitationIds(1,l) ! r
+            if(moi.EQ.mok .AND. moj.EQ.mol)then
+              diagfac = diagfactors(j)
+              diagfac *= diagfactors(l)
+              !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac
+              GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = <ps,qr>
+            else
+              diagfac = diagfactors(j)*diagfactors(l)
+              !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac
+              GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = <ps,qr>
+              !endif
+            endif
+          enddo
+        enddo
+        totcolsTKI += colsikpq
+      enddo
+
+
+
+      ! Do big BLAS
+      ! TODO TKI, size(TKI,1)*size(TKI,2)
+      call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,&
+          TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,&
+          TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
+
+
+      ! Collect the result
+      totcolsTKI = 0
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI     = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel)
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        do m = 1,colsikpq
+          do l = 1,rowsTKI
+            do kk = 1,n_st
+              psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + &
+                AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j)
+            enddo
+          enddo
+        enddo
+        totcolsTKI += colsikpq
+      enddo
+
+      deallocate(TKI) ! coefficients of CSF
+      ! Initialize the inegral container
+      ! dims : (totcolsTKI, nconnectedI)
+      deallocate(GIJpqrs)  ! gpqrs
+      deallocate(TKIGIJ)  ! gpqrs
+
+    enddo ! loop over alphas
+  enddo ! loop over I
+  deallocate(connectedI_alpha)
+  deallocate(idxs_connectedI_alpha)
+  deallocate(excitationIds)
+  deallocate(excitationTypes)
+  deallocate(diagfactors)
+
+
+  ! Add the diagonal contribution
+  do i = 1,n_CSF
+    do kk=1,n_st
+      psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
+    enddo
+  enddo
+  call omp_set_max_active_levels(4)
+
+end subroutine calculate_sigma_vector_cfg_nst_naive_store
diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c
index 1266b890..91bae8b2 100644
--- a/src/csf/tree_utils.c
+++ b/src/csf/tree_utils.c
@@ -1,3 +1,4 @@
+#include <assert.h>
 #include "tree_utils.h"
 
 void buildTree(Tree *bftree,
@@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){
     int icpl  = 0; // keep track of the ith ms (cannot be -ve)
     int addr  = 0; // Counts the total BF's
 
+    assert(bftree->rootNode->addr == 0);
     buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS);
 
     *NBF = bftree->rootNode->addr;
@@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){
     int NSOMO=0;
     getSetBits(Isomo, &NSOMO);
     genDetsDriver(dettree, NSOMO, MS, ndets);
+    // Closed shell case
+    if(NSOMO==0) (*ndets) = 1;
 
 }
 
@@ -311,3 +315,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col
             break;
     }
 }
+
+void printRealMatrix(double *orthoMatrix, int rows, int cols){
+  int i,j;
+  for(i=0;i<rows;++i){
+    for(j=0;j<cols;++j){
+      printf(" %3.5f ",orthoMatrix[i*cols + j]);
+    }
+    printf("\n");
+  }
+}
diff --git a/src/csf/tree_utils.h b/src/csf/tree_utils.h
index 5a788149..42fce59c 100644
--- a/src/csf/tree_utils.h
+++ b/src/csf/tree_utils.h
@@ -47,6 +47,7 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
 void getSetBits(int64_t n, int *nsetbits);
 void getOverlapMatrix(int64_t Isomo, int64_t MS, double **overlapMatrixptr, int *rows, int *cols, int *NSOMOout);
 void getOverlapMatrix_withDet(double *bftodetmatrixI, int rowsbftodetI, int colsbftodetI, int64_t Isomo, int64_t MS, double **overlapMatrixI, int *rowsI, int *colsI, int *NSOMO);
+void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix);
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix);
 
 
diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
new file mode 100644
index 00000000..659602a1
--- /dev/null
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -0,0 +1,624 @@
+subroutine davidson_diag_h_cfg(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
+  double precision, intent(out)  :: energies(N_st_diag)
+  integer, intent(in)            :: dressing_state
+  logical, intent(out)           :: converged
+  double precision, allocatable  :: H_jj(:)
+
+  double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
+  integer                        :: i,k
+  ASSERT (N_st > 0)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint)
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP  SHARED(sze,H_jj, dets_in,Nint)                    &
+      !$OMP  PRIVATE(i)
+  !$OMP DO SCHEDULE(static)
+  do i=2,sze
+    H_jj(i)  = diag_H_mat_elem(dets_in(1,1,i),Nint)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (dressing_state > 0) then
+    do k=1,N_st
+      do i=1,sze
+        H_jj(i)  += u_in(i,k) * dressing_column_h(i,k)
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  deallocate(H_jj)
+end
+
+
+subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged)
+  use bitmasks
+  use mmap_module
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision,  intent(in)  :: H_jj(sze)
+  integer, intent(in)            :: dressing_state
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag_in)
+  double precision, intent(out)  :: energies(N_st_diag_in)
+
+  integer                        :: iter, N_st_diag
+  integer                        :: i,j,k,l,m,kk,ii,ll
+  logical, intent(inout)         :: converged
+
+  double precision, external     :: u_dot_v, u_dot_u
+
+  integer                        :: k_pairs, kl
+
+  integer                        :: iter2, itertot
+  double precision, allocatable  :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable  :: s_tmp(:,:)
+  double precision               :: diag_h_mat_elem
+  double precision, allocatable  :: residual_norm(:)
+  character*(16384)              :: write_buffer
+  double precision               :: to_print(2,N_st)
+  double precision               :: cpu, wall
+  integer                        :: shift, shift2, itermax, istate
+  double precision               :: r1, r2, alpha
+  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
+  integer                        :: nproc_target
+  integer                        :: order(N_st_diag_in)
+  double precision               :: cmax
+  double precision, allocatable  :: U(:,:), U_csf(:,:), overlap(:,:)
+  double precision, allocatable  :: tmpU(:,:), tmpW(:,:)
+  double precision, pointer      :: W(:,:), W_csf(:,:)
+  logical                        :: disk_based
+  double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if (N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
+  itertot = 0
+
+  if (state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag)                  &! U
+         + dble(sze_csf)*(N_st_diag*itermax)      &! U_csf
+         + dble(sze)*(N_st_diag)                  &! W
+         + dble(sze_csf)*(N_st_diag*itermax)      &! W_csf
+         + 3.0d0*(N_st_diag*itermax)**2           &! h,y,s_tmp
+         + 1.d0*(N_st_diag*itermax)               &! lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_u_0_nstates_zmq
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on collector
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on slave
+         + 0.5d0*maxab                            &! idx0 in H_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if (nproc_target == 0) then
+      call check_mem(r1,irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if (r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if (itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m=0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+  call write_int(6,N_st,'Number of states')
+  call write_int(6,N_st_diag,'Number of states in diagonalization')
+  call write_int(6,sze,'Number of determinants')
+  call write_int(6,sze_csf,'Number of CSFs')
+  call write_int(6,nproc_target,'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if (disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy        Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+
+  if (disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8, fd_w, .False., ptr_w)
+    call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/))
+  else
+    allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax))
+  endif
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag),                                              &
+      U_csf(sze_csf,N_st_diag*itermax),                              &
+
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      residual_norm(N_st_diag),                                      &
+      lambda(N_st_diag*itermax))
+
+
+  h = 0.d0
+  U = 0.d0
+  y = 0.d0
+  s_tmp = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+  call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1))
+  do k=N_st+1,N_st_diag
+    do i=1,sze_csf
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st)
+    enddo
+    U_csf(k,k) = u_csf(k,k) + 10.d0
+  enddo
+  do k=1,N_st_diag
+    call normalize(U_csf(1,k),sze_csf)
+  enddo
+  call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1))
+
+  do while (.not.converged)
+    itertot = itertot+1
+    if (itertot == 8) then
+      exit
+    endif
+
+    do iter=1,itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+!      if ((iter > 1).or.(itertot == 1)) then
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U)
+        PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals
+        if ((sze > 100000).and.distributed_davidson) then
+            !call H_u_0_nstates_zmq   (W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+            deallocate(tmpW)
+            deallocate(tmpU)
+        else
+            !call H_u_0_nstates_openmp(W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            !tmpU     =0.0d0
+            !tmpU(1,2)=1.0d0
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+
+            !U_csf = 0.0d0
+            !U_csf(1,1) = 1.0d0
+            !u_in = 0.0d0
+            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            !do i=1,sze_csf
+            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+            !    print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  endif
+            !end do
+            !stop
+            deallocate(tmpW)
+            deallocate(tmpU)
+        endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif
+
+      if (dressing_state > 0) then
+
+        if (N_st == 1) then
+
+          l = dressed_column_idx(1)
+          double precision :: f
+          f = 1.0d0/psi_coef(l,1)
+          do istate=1,N_st_diag
+            do i=1,sze
+              W(i,istate) += dressing_column_h(i,1) *f * U(l,istate)
+              W(l,istate) += dressing_column_h(i,1) *f * U(i,istate)
+            enddo
+
+          enddo
+
+        else
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            psi_coef, size(psi_coef,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            dressing_column_h, size(dressing_column_h,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+        endif
+      endif
+
+      !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1),                          &
+          0.d0, h, size(h,1))
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      ! Diagonalize h
+      ! ---------------
+
+       integer :: lwork, info
+       double precision, allocatable :: work(:)
+
+       y = h
+       lwork = -1
+       allocate(work(1))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       lwork = int(work(1))
+       deallocate(work)
+       allocate(work(lwork))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       deallocate(work)
+       if (info /= 0) then
+         stop 'DSYGV Diagonalization failed'
+       endif
+
+      ! Compute Energy for each eigenvector
+      ! -----------------------------------
+
+      call dgemm('N','N',shift2,shift2,shift2,                       &
+          1.d0, h, size(h,1), y, size(y,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      call dgemm('T','N',shift2,shift2,shift2,                       &
+          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+          0.d0, h, size(h,1))
+
+      do k=1,shift2
+        lambda(k) = h(k,k)
+      enddo
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do i=1,shift2
+          do k=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+        enddo
+
+      endif
+
+
+      ! Express eigenvectors of h in the csf basis
+      ! ------------------------------------------
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U)
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W)
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !if (without_diagonal) then
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k=1,N_st_diag
+        do i=1,sze
+           U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )      &
+              /max(H_jj(i) - lambda (k),1.d-2)
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+      !else
+      !  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      !  do k=1,N_st_diag
+      !    do i=1,sze
+      !       U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )  
+      !    enddo
+      !  enddo
+      !  !$OMP END PARALLEL DO
+      !endif
+
+      do k=1,N_st
+        residual_norm(k) = u_dot_u(U(1,k),sze)
+        to_print(1,k) = lambda(k) + nuclear_repulsion
+        to_print(2,k) = residual_norm(k)
+      enddo
+      call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1))
+
+      if ((itertot>1).and.(iter == 1)) then
+        !don't print
+        continue
+      else
+        write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if (iter > 1) then
+        if (threshold_davidson_from_pt2) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+        else
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson
+        endif
+      endif
+
+      do k=1,N_st
+        if (residual_norm(k) > 1.d8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if (converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if (qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U
+    ! -------------
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        W_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,W_csf,W)
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        U_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,U_csf,U)
+
+  enddo
+
+
+  call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2)
+  do k=1,N_st_diag
+    do i=1,sze
+      u_in(i,k) = U(i,k)
+    enddo
+  enddo
+
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '======'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if (disk_based)then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+  else
+    deallocate(W, W_csf)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, U_csf, overlap,                                             &
+      h, y, s_tmp,                                                   &
+      lambda                                                         &
+      )
+  FREE nthreads_davidson
+end
+
+
+
+
+
+
+
diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f
index a88330f6..3c5224a2 100644
--- a/src/davidson/diagonalization_hcsf_dressed.irp.f
+++ b/src/davidson/diagonalization_hcsf_dressed.irp.f
@@ -89,7 +89,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
   double precision, intent(out)  :: energies(N_st_diag_in)
 
   integer                        :: iter, N_st_diag
-  integer                        :: i,j,k,l,m
+  integer                        :: i,j,k,l,m,kk
   logical, intent(inout)         :: converged
 
   double precision, external     :: u_dot_v, u_dot_u
diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f
index d37b7386..816f16ec 100644
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@@ -149,7 +149,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
   character*(16384)              :: write_buffer
   double precision               :: to_print(3,N_st)
   double precision               :: cpu, wall
-  integer                        :: shift, shift2, itermax, istate
+  integer                        :: shift, shift2, itermax, istate, ii
   double precision               :: r1, r2, alpha
   logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
   integer                        :: nproc_target
@@ -356,7 +356,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
         if ((sze > 100000).and.distributed_davidson) then
             call H_S2_u_0_nstates_zmq   (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
         else
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
             call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
         endif
         S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag))
 !      else
diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f
index befd1907..8ec6cd7e 100644
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@@ -5,7 +5,8 @@ BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ]
  !
  ! If 'cfg', use <Psi_csf|H|Psi_csf> in Davidson
  END_DOC
- sigma_vector_algorithm = 'det'
+ !sigma_vector_algorithm = 'det'
+ sigma_vector_algorithm = 'cfg'
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]
@@ -71,7 +72,7 @@ END_PROVIDER
    if (diag_algorithm == 'Davidson') then
 
      if (do_csf) then
-!      if (sigma_vector_algorithm == 'det') then
+       if (sigma_vector_algorithm == 'det') then
          call davidson_diag_H_csf (psi_det,                  &
                                    CI_eigenvectors,          &
                                    size(CI_eigenvectors,1),  &
@@ -83,14 +84,14 @@ END_PROVIDER
                                    N_int,                    &
                                    0,                        &
                                    converged)
-!       else if (sigma_vector_algorithm == 'cfg') then
-!       call davidson_diag_H_csf(psi_det,CI_eigenvectors, &
-!         size(CI_eigenvectors,1),CI_electronic_energy,               &
-!         N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
-!       else
-!         print *, irp_here
-!         stop 'bug'
-!      endif
+       else if (sigma_vector_algorithm == 'cfg') then
+          call davidson_diag_H_cfg(psi_det,CI_eigenvectors, &
+          size(CI_eigenvectors,1),CI_electronic_energy,               &
+          N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
+       else
+          print *, irp_here
+          stop 'bug'
+       endif
      else
        call davidson_diag_HS2(psi_det,                  &
                               CI_eigenvectors,          &
diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f
index b764a1a6..ef99bd5a 100644
--- a/src/mo_two_e_ints/core_quantities.irp.f
+++ b/src/mo_two_e_ints/core_quantities.irp.f
@@ -59,3 +59,45 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
  enddo
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Active Hamiltonian with 3-index exchange integrals:
+ !
+ ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$
+ END_DOC
+
+ integer :: i,j, k
+ integer :: p,q, r
+ ! core-core contribution
+ h_act_ri = core_fock_operator
+ !print *,' Bef----hact(1,14)=',h_act_ri(4,14)
+ ! act-act contribution
+ do p=1,n_act_orb
+   j=list_act(p)
+   do q=1,n_act_orb
+     i=list_act(q)
+     h_act_ri(i,j) = mo_one_e_integrals(i,j)
+   enddo
+   do r=1,n_act_orb
+     k=list_act(r)
+     do q=1,n_act_orb
+       i=list_act(q)
+       h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+     enddo
+   enddo
+ enddo
+ ! core-act contribution
+ !do p=1,n_act_orb
+ !  j=list_core(p)
+ !  do k=1,n_core_orb
+ !    do q=1,n_act_orb
+ !      i=list_act(q)
+ !      h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+ !    enddo
+ !  enddo
+ !enddo
+ !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14)
+END_PROVIDER
+
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 405d2d20..3463370f 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1133,6 +1133,103 @@ subroutine ortho_svd(A,LDA,m,n)
 
 end
 
+subroutine ortho_qr_withB(A,LDA,B,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !
+  ! A : Overlap Matrix
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! m : Number of rows of A
+  !
+  ! n : Number of columns of A
+  !
+  ! B : Output orthogonal basis
+  !
+  END_DOC
+  integer, intent(in)            :: m,n, LDA
+  double precision, intent(inout) :: A(LDA,n)
+  double precision, intent(inout) :: B(LDA,n)
+
+  integer                        :: LWORK, INFO
+  integer, allocatable           :: jpvt(:)
+  double precision, allocatable  :: TAU(:), WORK(:)
+  double precision, allocatable  :: C(:,:)
+  double precision :: norm
+  integer :: i,j
+
+  allocate (TAU(min(m,n)), WORK(1))
+  allocate (jpvt(n))
+  !print *," In function ortho"
+  B = A
+
+  jpvt(1:n)=1
+
+  LWORK=-1
+  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+
+  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+  LWORK=max(n,int(WORK(1)))
+
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+  print *,A
+  print *,jpvt
+  deallocate(WORK,TAU)
+  !stop
+
+  !LWORK=-1
+  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+  !LWORK=max(n,int(WORK(1)))
+
+  !deallocate(WORK)
+  !allocate(WORK(LWORK))
+  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+
+  !LWORK=-1
+  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+  !LWORK=max(n,int(WORK(1)))
+
+  !deallocate(WORK)
+  !allocate(WORK(LWORK))
+  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+  !
+  !allocate(C(LDA,n))
+  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
+  !norm = 0.0d0
+  !B = 0.0d0
+  !!print *,C
+  !do i=1,m
+  !  norm = 0.0d0
+  !  do j=1,n
+  !    norm = norm + C(j,i)*C(j,i)
+  !  end do
+  !  norm = 1.0d0/dsqrt(norm)
+  !  do j=1,n
+  !    B(j,i) = C(j,i)
+  !  end do
+  !end do
+  !print *,B
+
+
+  !deallocate(WORK,TAU)
+end
+
+subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
+  use iso_c_binding
+  integer(c_int32_t), value      :: LDA
+  integer(c_int32_t), value      :: m
+  integer(c_int32_t), value      :: n
+  integer(c_int16_t)             :: A(LDA,n)
+  integer(c_int16_t)             :: B(LDA,n)
+  call ortho_qr_withB(A,LDA,B,m,n)
+end subroutine ortho_qr_csf
+
 subroutine ortho_qr(A,LDA,m,n)
   implicit none
   BEGIN_DOC