diff --git a/src/ao_basis/aos_cplx.irp.f b/src/ao_basis/aos_cplx.irp.f
index f571b28d..da1adb94 100644
--- a/src/ao_basis/aos_cplx.irp.f
+++ b/src/ao_basis/aos_cplx.irp.f
@@ -5,3 +5,19 @@
 ! END_DOC
 ! ao_num_per_kpt = ao_num/kpt_num
 !END_PROVIDER
+
+subroutine get_kpt_idx_ao(idx_full,k,i)
+  implicit none
+  BEGIN_DOC
+  ! idx_full is ao index in full range (up to ao_num)
+  ! k is index of the k-point for this ao
+  ! i is index of this ao within k-point k
+  ! this assumes that all kpts have the same number of aos
+  END_DOC
+
+  integer, intent(in) :: idx_full
+  integer, intent(out) :: i,k
+  i = mod(idx_full-1,ao_num_per_kpt)+1
+  k = (idx_full-1)/ao_num_per_kpt+1
+  ASSERT (k <= kpt_num)
+end
diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f
index b6191b86..7b51fb54 100644
--- a/src/ao_one_e_ints/ao_overlap.irp.f
+++ b/src/ao_one_e_ints/ao_overlap.irp.f
@@ -122,6 +122,22 @@ BEGIN_PROVIDER [ complex*16, ao_overlap_kpts, (ao_num_per_kpt, ao_num_per_kpt, k
 END_PROVIDER
 
 
+BEGIN_PROVIDER [ double precision, ao_overlap_kpts_real, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex AOs
+  END_DOC
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,ao_num_per_kpt
+      do i=1,ao_num_per_kpt
+        ao_overlap_kpts_real(i,j,k) = dble(ao_overlap_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
+
 
 
 BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index a3703a62..8bbf41c7 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -2918,14 +2918,10 @@ subroutine get_d1_kpts(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, c
     hfix = h(1,ma)
     p1 = p(1,ma)
     p2 = p(2,ma)
-    kputi = (puti-1)/mo_num_per_kpt + 1
-    khfix = (hfix-1)/mo_num_per_kpt + 1
-    kp1   =   (p1-1)/mo_num_per_kpt + 1
-    kp2   =   (p2-1)/mo_num_per_kpt + 1
-    iputi = mod(puti-1,mo_num_per_kpt) + 1
-    ihfix = mod(hfix-1,mo_num_per_kpt) + 1
-    ip1   = mod(p1-1,  mo_num_per_kpt) + 1
-    ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+    call get_kpt_idx_mo(puti,kputi,iputi)
+    call get_kpt_idx_mo(hfix,khfix,ihfix)
+    call get_kpt_idx_mo(p1,kp1,ip1)
+    call get_kpt_idx_mo(p2,kp2,ip2)
 
     if(.not. bannedOrb(puti, mi)) then
       !==================
@@ -3059,8 +3055,7 @@ subroutine get_d1_kpts(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, c
 
     !MOVE MI
     pfix = p(1,mi)
-    kpfix = (pfix-1)/mo_num_per_kpt + 1
-    ipfix = mod(pfix-1,mo_num_per_kpt) + 1
+    call get_kpt_idx_mo(pfix,kpfix,ipfix)
     tmp_row = (0.d0,0.d0)
     tmp_row2 = (0.d0,0.d0)
     !tmp_row_kpts = (0.d0,0.d0)
@@ -3270,14 +3265,10 @@ subroutine get_d1_kpts(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, c
         puti = p(i, ma)
         p1 = p(turn3(1,i), ma)
         p2 = p(turn3(2,i), ma)
-        kputi = (puti-1)/mo_num_per_kpt + 1
-        khfix = (hfix-1)/mo_num_per_kpt + 1
-        kp1   =   (p1-1)/mo_num_per_kpt + 1
-        kp2   =   (p2-1)/mo_num_per_kpt + 1
-        iputi = mod(puti-1,mo_num_per_kpt) + 1
-        ihfix = mod(hfix-1,mo_num_per_kpt) + 1
-        ip1   = mod(p1-1,  mo_num_per_kpt) + 1
-        ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+        call get_kpt_idx_mo(puti,kputi,iputi)
+        call get_kpt_idx_mo(hfix,khfix,ihfix)
+        call get_kpt_idx_mo(p1,kp1,ip1)
+        call get_kpt_idx_mo(p2,kp2,ip2)
         call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
         call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
         call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
@@ -3425,14 +3416,10 @@ subroutine get_d1_kpts(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, c
       pfix = p(1,mi)
       p1 = p(1,ma)
       p2 = p(2,ma)
-      kpfix = (pfix-1)/mo_num_per_kpt + 1
-      khfix = (hfix-1)/mo_num_per_kpt + 1
-      kp1   =   (p1-1)/mo_num_per_kpt + 1
-      kp2   =   (p2-1)/mo_num_per_kpt + 1
-      ipfix = mod(pfix-1,mo_num_per_kpt) + 1
-      ihfix = mod(hfix-1,mo_num_per_kpt) + 1
-      ip1   = mod(p1-1,  mo_num_per_kpt) + 1
-      ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+      call get_kpt_idx_mo(pfix,kpfix,ipfix)
+      call get_kpt_idx_mo(hfix,khfix,ihfix)
+      call get_kpt_idx_mo(p1,kp1,ip1)
+      call get_kpt_idx_mo(p2,kp2,ip2)
       tmp_row = (0.d0,0.d0)
       tmp_row2 = (0.d0,0.d0)
       !tmp_row_kpts = (0.d0,0.d0)
diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index c3c9f821..cf19e629 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -56,10 +56,14 @@ subroutine run
   double precision               :: cisdq(N_states), delta_e
   double precision,external      :: diag_h_mat_elem
 
-  if(pseudo_sym)then
-   call H_apply_cisd_sym
+  if (is_complex) then
+    call H_apply_cisd_kpts
   else
-   call H_apply_cisd
+    if(pseudo_sym)then
+     call H_apply_cisd_sym
+    else
+     call H_apply_cisd
+    endif
   endif
   if (is_complex) then
     psi_coef_complex = ci_eigenvectors_complex
diff --git a/src/cisd/kpts_cisd.irp.f b/src/cisd/kpts_cisd.irp.f
new file mode 100644
index 00000000..8e37956f
--- /dev/null
+++ b/src/cisd/kpts_cisd.irp.f
@@ -0,0 +1,666 @@
+
+subroutine H_apply_cisd_kpts_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  implicit none
+  integer(bit_kind), intent(in)         :: key_in(N_int, 2), hole_1(N_int, 2), hole_2(N_int, 2)
+  integer(bit_kind), intent(in)         :: particl_1(N_int, 2), particl_2(N_int, 2)
+  integer(bit_kind)                     :: p1_mask(N_int, 2), p2_mask(N_int, 2), tmp
+  integer,intent(in)                    :: i_generator,iproc_in
+  integer                               :: status(N_int*bit_kind_size, 2)
+  integer                               :: highest, p1,p2,sp,ni,i,mi,nt,ns,k
+  double precision, intent(in)          :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind), intent(in)         :: key_prev(N_int, 2, *)
+  PROVIDE N_int
+  PROVIDE N_det
+
+  
+
+  highest = 0
+  do k=1,N_int*bit_kind_size
+    status(k,1) = 0
+    status(k,2) = 0
+  enddo
+  do sp=1,2
+    do ni=1,N_int
+      do i=1,bit_kind_size
+        if(iand(1_bit_kind,shiftr(key_in(ni, sp), (i-1))) == 0) then
+          cycle
+        end if
+        mi = (ni-1)*bit_kind_size+i
+        status(mi, sp) = int(iand(1_bit_kind,shiftr(hole_1(ni,sp),(i-1))),4)
+        status(mi, sp) = status(mi, sp) + 2*int(iand(1_bit_kind,shiftr(hole_2(ni,sp),(i-1))),4)
+        if(status(mi, sp) /= 0 .and. mi > highest) then
+          highest = mi
+        end if
+      end do
+    end do
+  end do
+
+  do sp=1,2
+    do p1=1,highest
+      if(status(p1, sp) == 0) then
+        cycle
+      end if
+      do p2=1,highest
+        if(status(p2, sp) == 0) then
+          cycle
+        end if
+        if((status(p1, sp) == 1 .and. status(p2, sp) > 1) .or. &
+            (status(p1, sp) == 2 .and. status(p2, sp) == 3) .or. &
+            (status(p1, sp) == 3 .and. status(p2, sp) == 3 .and. p2 > p1)) then
+          call H_apply_cisd_kpts_diexcP(key_in, sp, p1, particl_1, sp, p2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+        end if
+      end do
+    end do
+  end do
+  do p1=1,highest
+    if(status(p1, 1) == 0) then
+      cycle
+    end if
+    do p2=1,highest
+      if(status(p2, 2) == 0) then
+        cycle
+      end if
+      if((status(p1, 1) == 3) .or. &
+          (status(p1, 1) == 1 .and. status(p2, 2) >= 2) .or. &
+          (status(p1, 1) == 2 .and. status(p2, 2) /= 2)) then
+
+          call H_apply_cisd_kpts_diexcP(key_in, 1, p1, particl_1, 2, p2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+      end if
+    end do
+  end do
+end subroutine
+
+
+subroutine H_apply_cisd_kpts_diexcP(key_in, fs1, fh1, particl_1, fs2, fh2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  implicit none
+  integer(bit_kind), intent(in)         :: key_in(N_int, 2), particl_1(N_int, 2), particl_2(N_int, 2)
+  double precision, intent(in)          :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind)                     :: p1_mask(N_int, 2), p2_mask(N_int, 2), key_mask(N_int, 2)
+  integer,intent(in)                    :: fs1,fs2,i_generator,iproc_in, fh1,fh2
+  integer(bit_kind)                     :: miniList(N_int, 2, N_det)
+  integer                               :: n_minilist, n_alpha, n_beta, deg(2), i, ni, k
+  
+  integer(bit_kind), parameter :: one = 1_bit_kind
+
+  do k=1,N_int
+    p1_mask(k,1) = 0_bit_kind
+    p1_mask(k,2) = 0_bit_kind
+    p2_mask(k,1) = 0_bit_kind
+    p2_mask(k,2) = 0_bit_kind
+  enddo
+  p1_mask(shiftr(fh1-1,bit_kind_shift) + 1, fs1) = shiftl(one,iand(fh1-1,bit_kind_size-1))
+  p2_mask(shiftr(fh2-1,bit_kind_shift) + 1, fs2) = shiftl(one,iand(fh2-1,bit_kind_size-1))
+
+  do k=1,N_int
+    key_mask(k,1) = key_in(k,1)
+    key_mask(k,2) = key_in(k,2)
+  enddo
+
+  key_mask(shiftr(fh1-1,bit_kind_shift) + 1, fs1) -= shiftl(one,iand(fh1-1,bit_kind_size-1))
+  key_mask(shiftr(fh2-1,bit_kind_shift) + 1, fs2) -= shiftl(one,iand(fh2-1,bit_kind_size-1))
+
+
+  call H_apply_cisd_kpts_diexcOrg(key_in, key_mask, p1_mask, particl_1, p2_mask, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+end subroutine
+
+
+subroutine H_apply_cisd_kpts_diexcOrg(key_in,key_mask,hole_1,particl_1,hole_2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all double excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2), key_mask(N_int, 2)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind), intent(in)  :: hole_1(N_int,2), particl_1(N_int,2)
+  integer(bit_kind), intent(in)  :: hole_2(N_int,2), particl_2(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind), allocatable :: hole_save(:,:)
+  integer(bit_kind), allocatable :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind), allocatable :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer, allocatable           :: occ_particle(:,:), occ_hole(:,:)
+  integer, allocatable           :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+
+  double precision               :: mo_two_e_integral
+  logical                        :: is_a_two_holes_two_particles
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  integer, allocatable           :: ib_jb_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+  integer                        :: jtest_vvvv
+
+  logical :: check_double_excitation
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: b_cycle
+  logical :: yes_no
+  check_double_excitation = .True.
+  iproc = iproc_in
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+
+  
+
+
+
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2),          &
+            ib_jb_pairs(2,0:(elec_alpha_num)*mo_num))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      ASSERT (i_a > 0)
+      ASSERT (i_a <= mo_num)
+
+      do jj=1,N_elec_in_key_part_1(ispin)              !particle
+        j_a = occ_particle(jj,ispin)
+        ASSERT (j_a > 0)
+        ASSERT (j_a <= mo_num)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  logical, allocatable           :: array_pairs(:,:)
+  allocate(array_pairs(mo_num,mo_num))
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      ASSERT (i_a > 0)
+      ASSERT (i_a <= mo_num)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      ASSERT (j_a > 0)
+      ASSERT (j_a <= mo_num)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+
+      !!!! Second couple hole particle
+      do j = 1, N_int
+        hole_tmp(j,1) = iand(hole_2(j,1),hole(j,1))
+        hole_tmp(j,2) = iand(hole_2(j,2),hole(j,2))
+        particle_tmp(j,1) = iand(xor(particl_2(j,1),hole(j,1)),particl_2(j,1))
+        particle_tmp(j,2) = iand(xor(particl_2(j,2),hole(j,2)),particl_2(j,2))
+      enddo
+
+      call bitstring_to_list_ab(particle_tmp,occ_particle_tmp,N_elec_in_key_part_2,N_int)
+      call bitstring_to_list_ab(hole_tmp,occ_hole_tmp,N_elec_in_key_hole_2,N_int)
+
+      !   hole = a^(+)_j_a(ispin) a_i_a(ispin)|key_in> : single exc :: orb(i_a,ispin) --> orb(j_a,ispin)
+      hole_save = hole
+
+      ! Build array of the non-zero integrals of second excitation
+      array_pairs = .True.
+
+        if (ispin == 1) then
+        integer                        :: jjj
+
+        i=0
+        do kk = 1,N_elec_in_key_hole_2(other_spin)
+          i_b = occ_hole_tmp(kk,other_spin)
+          ASSERT (i_b > 0)
+          ASSERT (i_b <= mo_num)
+          do jjj=1,N_elec_in_key_part_2(other_spin)     ! particle
+            j_b = occ_particle_tmp(jjj,other_spin)
+            ASSERT (j_b > 0)
+            ASSERT (j_b <= mo_num)
+            if (array_pairs(i_b,j_b)) then
+              
+              i+= 1
+              ib_jb_pairs(1,i) = i_b
+              ib_jb_pairs(2,i) = j_b
+            endif
+          enddo
+        enddo
+        ib_jb_pairs(1,0) = i
+
+        do kk = 1,ib_jb_pairs(1,0)
+          hole = hole_save
+          i_b = ib_jb_pairs(1,kk)
+          j_b = ib_jb_pairs(2,kk)
+          k = shiftr(i_b-1,bit_kind_shift)+1
+          j = i_b-shiftl(k-1,bit_kind_shift)-1
+          hole(k,other_spin) = ibclr(hole(k,other_spin),j)
+          key = hole
+          k = shiftr(j_b-1,bit_kind_shift)+1
+          l = j_b-shiftl(k-1,bit_kind_shift)-1
+          key(k,other_spin) = ibset(key(k,other_spin),l)
+          
+          
+          
+          
+          
+          
+          
+          
+          
+          
+          key_idx += 1
+          do k=1,N_int
+            keys_out(k,1,key_idx) = key(k,1)
+            keys_out(k,2,key_idx) = key(k,2)
+          enddo
+          ASSERT (key_idx <= size_max)
+          if (key_idx == size_max) then
+            call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+            key_idx = 0
+          endif
+        enddo
+      endif
+
+      !   does all the single excitations of the same spin
+      i=0
+      do kk = 1,N_elec_in_key_hole_2(ispin)
+        i_b = occ_hole_tmp(kk,ispin)
+        if (i_b <= i_a.or.i_b == j_a) cycle
+        ASSERT (i_b > 0)
+        ASSERT (i_b <= mo_num)
+        do jjj=1,N_elec_in_key_part_2(ispin)     ! particule
+          j_b = occ_particle_tmp(jjj,ispin)
+          ASSERT (j_b > 0)
+          ASSERT (j_b <= mo_num)
+          if (j_b <= j_a) cycle
+          if (array_pairs(i_b,j_b)) then
+            
+            i+= 1
+            ib_jb_pairs(1,i) = i_b
+            ib_jb_pairs(2,i) = j_b
+          endif
+        enddo
+      enddo
+      ib_jb_pairs(1,0) = i
+
+      do kk = 1,ib_jb_pairs(1,0)
+        hole = hole_save
+        i_b = ib_jb_pairs(1,kk)
+        j_b = ib_jb_pairs(2,kk)
+        k = shiftr(i_b-1,bit_kind_shift)+1
+        j = i_b-shiftl(k-1,bit_kind_shift)-1
+        hole(k,ispin) = ibclr(hole(k,ispin),j)
+        key = hole
+        k = shiftr(j_b-1,bit_kind_shift)+1
+        l = j_b-shiftl(k-1,bit_kind_shift)-1
+        key(k,ispin) = ibset(key(k,ispin),l)
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        key_idx += 1
+        do k=1,N_int
+          keys_out(k,1,key_idx) = key(k,1)
+          keys_out(k,2,key_idx) = key(k,2)
+        enddo
+        ASSERT (key_idx <= size_max)
+        if (key_idx == size_max) then
+          call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+          key_idx = 0
+        endif
+      enddo ! kk
+
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_h_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, ib_jb_pairs,                              &
+      keys_out, hole_save,                                           &
+      key,hole, particle, hole_tmp,                                  &
+      particle_tmp, occ_particle,                                    &
+      occ_hole, occ_particle_tmp,                                    &
+      occ_hole_tmp,array_pairs,key_union_hole_part)
+  
+  
+end
+
+subroutine H_apply_cisd_kpts_monoexc(key_in, hole_1,particl_1,fock_diag_tmp,i_generator,iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all single excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2)
+  integer(bit_kind),intent(in)   :: hole_1(N_int,2), particl_1(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind),allocatable  :: hole_save(:,:)
+  integer(bit_kind),allocatable  :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind),allocatable  :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind),allocatable  :: hole_2(:,:), particl_2(:,:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer,allocatable            :: occ_particle(:,:), occ_hole(:,:)
+  integer,allocatable            :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer,allocatable            :: ib_jb_pairs(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+  logical                        :: is_a_two_holes_two_particles
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  logical, allocatable           :: array_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+
+  integer(bit_kind)              :: key_mask(N_int, 2)
+
+  logical :: check_double_excitation
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: yes_no
+
+  do k=1,N_int
+    key_mask(k,1) = 0_bit_kind
+    key_mask(k,2) = 0_bit_kind
+  enddo
+
+  iproc = iproc_in
+
+  check_double_excitation = .True.
+  
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+  
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      do jj=1,N_elec_in_key_part_1(ispin)                            !particule
+        j_a = occ_particle(jj,ispin)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+  
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+  
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      key_idx += 1
+      do k=1,N_int
+        keys_out(k,1,key_idx) = hole(k,1)
+        keys_out(k,2,key_idx) = hole(k,2)
+      enddo
+      if (key_idx == size_max) then
+        call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+        key_idx = 0
+      endif
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, &
+      keys_out, hole_save,          &
+      key,hole, particle, hole_tmp,&
+      particle_tmp, occ_particle,    &
+      occ_hole, occ_particle_tmp,&
+      occ_hole_tmp,key_union_hole_part)
+  
+  
+
+end
+
+subroutine H_apply_cisd_kpts()
+  implicit none
+  use omp_lib
+  use bitmasks
+  BEGIN_DOC
+  ! Calls H_apply on the |HF| determinant and selects all connected single and double
+  ! excitations (of the same symmetry). Auto-generated by the ``generate_h_apply`` script.
+  END_DOC
+
+  
+
+  integer                        :: i_generator
+  double precision               :: wall_0, wall_1
+  integer(bit_kind), allocatable :: mask(:,:,:)
+  integer                        :: ispin, k
+  integer                        :: iproc
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  integer :: kk,kh1,kh2,kp1,kp2
+  integer(bit_kind), allocatable :: mask_kpts(:,:,:,:)
+  
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex
+  else
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators
+  endif
+
+  call wall_time(wall_0)
+
+  iproc = 0
+  !allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_num+1) )
+  allocate( mask_kpts(N_int,2,6,kpt_num), fock_diag_tmp(2,mo_num+1) )
+  do i_generator=1,N_det_generators
+
+    ! Compute diagonal of the Fock matrix
+    call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
+
+    ! Create bit masks for holes and particles
+    do kk=1,kpt_num
+    do ispin=1,2
+      do k=1,N_int
+        mask_kpts(k,ispin,s_hole,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,s_hole,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,s_part,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,s_part,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+        mask_kpts(k,ispin,d_hole1,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_hole1,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,d_part1,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_part1,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+        mask_kpts(k,ispin,d_hole2,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_hole2,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,d_part2,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_part2,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+      enddo
+    enddo
+    enddo
+    if(.True.)then
+      do kh1=1,kpt_num
+        do kh2=1,kpt_num
+          do kp1=1,kpt_num
+            kp2=kconserv(kh1,kh2,kp1)
+            !print*,'kh1h2p1p1',kh1,kh2,kp1,kp2
+            !print*,'size_before: ',h_apply_buffer(iproc)%n_det
+     call H_apply_cisd_kpts_diexc(psi_det_generators(1,1,i_generator),      &
+         psi_det_generators(1,1,1),                                   &
+         mask_kpts(1,1,d_hole1,kh1), mask_kpts(1,1,d_part1,kp1),                        &
+         mask_kpts(1,1,d_hole2,kh2), mask_kpts(1,1,d_part2,kp2),                        &
+         fock_diag_tmp, i_generator, iproc )
+            !print*,'size_after: ',h_apply_buffer(iproc)%n_det
+          enddo
+        enddo
+      enddo
+    endif
+    if(.True.)then
+      do kk=1,kpt_num
+     call H_apply_cisd_kpts_monoexc(psi_det_generators(1,1,i_generator),    &
+         mask_kpts(1,1,s_hole,kk), mask_kpts(1,1,s_part,kk),                        &
+         fock_diag_tmp, i_generator, iproc )
+      enddo
+    endif
+    call wall_time(wall_1)
+    
+    if (wall_1 - wall_0 > 2.d0) then
+        write(6,*)  &
+       100.*float(i_generator)/float(N_det_generators), '% in ', wall_1-wall_0, 's'
+        wall_0 = wall_1
+    endif
+  enddo
+
+  !deallocate( mask, fock_diag_tmp )
+  deallocate( mask_kpts, fock_diag_tmp )
+
+  call copy_H_apply_buffer_to_wf
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex N_det
+  else
+    SOFT_TOUCH psi_det psi_coef N_det
+  endif
+
+  
+  ! Sort H_jj to find the N_states lowest states
+  integer                        :: i
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: H_jj(:)
+  double precision, external     :: diag_h_mat_elem
+  allocate(H_jj(N_det),iorder(N_det))
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP SHARED(psi_det,N_int,H_jj,iorder,N_det)                  &
+      !$OMP PRIVATE(i)
+  !$OMP DO
+  do i = 1, N_det
+    H_jj(i) = diag_h_mat_elem(psi_det(1,1,i),N_int)
+    iorder(i) = i
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dsort(H_jj,iorder,N_det)
+  if (is_complex) then
+    do k=1,N_states
+      psi_coef_complex(iorder(k),k) = (1.d0,0.d0)
+    enddo
+  else
+    do k=1,N_states
+      psi_coef(iorder(k),k) = 1.d0
+    enddo
+  endif
+  deallocate(H_jj,iorder)
+    
+
+end
+
diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f
index a94bec2e..9926dd37 100644
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@@ -1070,9 +1070,10 @@ subroutine davidson_diag_hjj_sjj_complex(dets_in,u_in,H_jj,s2_out,energies,dim_i
       r1 = dsqrt(-2.d0*dlog(r1))
       r2 = dtwo_pi*r2
       !todo: real or complex? rescale for complex? sqrt(2)?
-      !u_in(i,k) = dcmplx(r1*dcos(r2),0.d0)
-      u_in(i,k) = dcmplx(r1*dcos(r2),r1*dsin(r2))
+      u_in(i,k) = dcmplx(r1*dcos(r2),0.d0)
+      !u_in(i,k) = dcmplx(r1*dcos(r2),r1*dsin(r2))
     enddo
+    u_in(k,k) = (10.d0,0.d0)
   enddo
   do k=1,N_st_diag
     call normalize_complex(u_in(1,k),sze)
diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f
index 63b084fc..d49b0690 100644
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@@ -411,6 +411,15 @@ END_PROVIDER
 
 END_PROVIDER
 
+subroutine diagonalize_ci
+  implicit none
+  if (is_complex) then
+    call diagonalize_ci_complex
+  else
+    call diagonalize_ci_real
+  endif
+end
+
 subroutine diagonalize_CI_complex
   implicit none
   BEGIN_DOC
@@ -429,7 +438,7 @@ subroutine diagonalize_CI_complex
   SOFT_TOUCH psi_coef_complex CI_electronic_energy_complex ci_energy CI_eigenvectors_complex CI_s2_complex psi_energy psi_s2
 end
 
-subroutine diagonalize_CI
+subroutine diagonalize_CI_real
   implicit none
   BEGIN_DOC
 !  Replace the coefficients of the |CI| states by the coefficients of the
diff --git a/src/determinants/density_matrix_cplx.irp.f b/src/determinants/density_matrix_cplx.irp.f
index 882b73ee..e5d74347 100644
--- a/src/determinants/density_matrix_cplx.irp.f
+++ b/src/determinants/density_matrix_cplx.irp.f
@@ -495,10 +495,8 @@ END_PROVIDER
         call decode_exc_spin(exc,h1,p1,h2,p2)
         ! h1 occ in k
         ! p1 occ in l
-        ih1 = mod(h1-1,mo_num_per_kpt)+1
-        ip1 = mod(p1-1,mo_num_per_kpt)+1
-        kh1 = (h1-1)/mo_num_per_kpt + 1
-        kp1 = (p1-1)/mo_num_per_kpt + 1
+        call get_kpt_idx_mo(h1,kh1,ih1)
+        call get_kpt_idx_mo(p1,kp1,ip1)
         if (kh1.ne.kp1) then
           print *,'problem in: ',irp_here,'a'
           print *,' h1 = ',h1
@@ -577,10 +575,8 @@ END_PROVIDER
         exc = 0
         call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int)
         call decode_exc_spin(exc,h1,p1,h2,p2)
-        ih1 = mod(h1-1,mo_num_per_kpt)+1
-        ip1 = mod(p1-1,mo_num_per_kpt)+1
-        kh1 = (h1-1)/mo_num_per_kpt + 1
-        kp1 = (p1-1)/mo_num_per_kpt + 1
+        call get_kpt_idx_mo(h1,kh1,ih1)
+        call get_kpt_idx_mo(p1,kp1,ip1)
         if (kh1.ne.kp1) then
           print *,'problem in: ',irp_here,'b'
           print *,' h1 = ',h1
diff --git a/src/determinants/single_excitations.irp.f b/src/determinants/single_excitations.irp.f
index 044c7d06..7199ef35 100644
--- a/src/determinants/single_excitations.irp.f
+++ b/src/determinants/single_excitations.irp.f
@@ -449,12 +449,14 @@ subroutine get_single_excitation_from_fock_kpts(det_1,det_2,ih,ip,spin,phase,hij
   integer :: occ_partcl(N_int*bit_kind_size,2)
   integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
   integer :: i0,i,h,p
-  integer :: ki,khp
+  integer :: ki,khp,kh
   complex*16 :: buffer_c(mo_num_per_kpt),buffer_x(mo_num_per_kpt)
-  khp = (ip-1)/mo_num_per_kpt+1
-  p = mod(ip-1,mo_num_per_kpt)+1
-  h = mod(ih-1,mo_num_per_kpt)+1
+
+  call get_kpt_idx_mo(ip,khp,p)
+  call get_kpt_idx_mo(ih,kh,h)
+  ASSERT (kh==khp)
   !todo: omp kpts
+  hij = fock_op_cshell_ref_bitmask_kpts(h,p,khp)
   do ki=1,kpt_num
     do i=1, mo_num_per_kpt
       !<hi|pi>
@@ -476,7 +478,6 @@ subroutine get_single_excitation_from_fock_kpts(det_1,det_2,ih,ip,spin,phase,hij
     enddo
     call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
     call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
-    hij = fock_op_cshell_ref_bitmask_kpts(h,p,khp)
     ! holes :: direct terms
     do i0 = 1, n_occ_ab_hole(1)
       i = occ_hole(i0,1) - (ki-1)*mo_num_per_kpt
diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index e9b9aca9..fb77fb45 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -2443,18 +2443,19 @@ subroutine i_H_j_complex(key_i,key_j,Nint,hij)
       if (exc(0,1,1) == 1) then
         call double_allowed_mo_kpts(exc(1,1,1),exc(1,1,2),exc(1,2,1),exc(1,2,2),is_allowed)
         if (.not.is_allowed) then
+          ! excitation doesn't conserve momentum
           hij = (0.d0,0.d0)
           return
         endif
         ! Single alpha, single beta
         if(exc(1,1,1) == exc(1,2,2) )then
-          ih1 = mod(exc(1,1,1)-1,mo_num_per_kpt)+1
-          ih2 = mod(exc(1,1,2)-1,mo_num_per_kpt)+1
-          kh1 = (exc(1,1,1)-1)/mo_num_per_kpt+1
-          kh2 = (exc(1,1,2)-1)/mo_num_per_kpt+1
-          ip1 = mod(exc(1,2,1)-1,mo_num_per_kpt)+1
-          kp1 = (exc(1,2,1)-1)/mo_num_per_kpt+1
+          !h1(a) = p2(b)
+          call get_kpt_idx_mo(exc(1,1,1),kh1,ih1)
+          call get_kpt_idx_mo(exc(1,1,2),kh2,ih2)
+          call get_kpt_idx_mo(exc(1,2,1),kp1,ip1)
+
           if(kp1.ne.kh2) then
+            !if h1==p2 then kp1==kh2
             print*,'problem with hij kpts: ',irp_here
             print*,is_allowed
             print*,exc(1,1,1),exc(1,1,2),exc(1,2,1),exc(1,2,2)
@@ -2464,12 +2465,10 @@ subroutine i_H_j_complex(key_i,key_j,Nint,hij)
           hij = phase * big_array_exchange_integrals_kpts(ih1,kh1,ih2,ip1,kp1)
           !hij = phase * big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
         else if (exc(1,2,1) ==exc(1,1,2))then
-          ih1 = mod(exc(1,1,1)-1,mo_num_per_kpt)+1
-          kh1 = (exc(1,1,1)-1)/mo_num_per_kpt+1
-          ip1 = mod(exc(1,2,1)-1,mo_num_per_kpt)+1
-          kp1 = (exc(1,2,1)-1)/mo_num_per_kpt+1
-          ip2 = mod(exc(1,2,2)-1,mo_num_per_kpt)+1
-          kp2 = (exc(1,2,2)-1)/mo_num_per_kpt+1
+          !p1(a)==h2(b)
+          call get_kpt_idx_mo(exc(1,1,1),kh1,ih1)
+          call get_kpt_idx_mo(exc(1,2,1),kp1,ip1)
+          call get_kpt_idx_mo(exc(1,2,2),kp2,ip2)
           if(kp2.ne.kh1) then
             print*,'problem with hij kpts: ',irp_here
             print*,is_allowed
diff --git a/src/hartree_fock/scf_k_real.irp.f b/src/hartree_fock/scf_k_real.irp.f
new file mode 100644
index 00000000..c666b989
--- /dev/null
+++ b/src/hartree_fock/scf_k_real.irp.f
@@ -0,0 +1,92 @@
+program scf_k_real
+  BEGIN_DOC
+! 
+! The :ref:`scf` program performs *Restricted* Hartree-Fock
+! calculations (the spatial part of the |MOs| is common for alpha and beta
+! spinorbitals).
+! 
+! It performs the following actions:
+! 
+! #. Compute/Read all the one- and two-electron integrals, and store them
+!    in memory
+! #. Check in the |EZFIO| database if there is a set of |MOs|.
+!    If there is, it will read them as initial guess. Otherwise, it will
+!    create a guess.
+! #. Perform the |SCF| iterations
+! 
+! For the keywords related to the |SCF| procedure, see the ``scf_utils``
+! directory where you will find all options.
+! 
+! At each iteration, the |MOs| are saved in the |EZFIO| database. Hence,
+! if the calculation crashes for any unexpected reason, the calculation
+! can be restarted by running again the |SCF| with the same |EZFIO|
+! database.
+! 
+! To start again a fresh |SCF| calculation, the |MOs| can be reset by
+! running the :ref:`qp_reset` command.
+! 
+! The `DIIS`_ algorithm is implemented, as well as the `level-shifting`_ 
+! method. If the |SCF| does not converge, try again with a higher value of
+! :option:`level_shift`.
+! 
+! .. _DIIS: https://en.wikipedia.org/w/index.php?title=DIIS
+! .. _level-shifting: https://doi.org/10.1002/qua.560070407
+!
+  END_DOC
+  call create_guess_k_real
+  call orthonormalize_mos_k_real
+  call run_k_real
+end
+
+subroutine create_guess_k_real
+  implicit none
+  BEGIN_DOC
+!   Create a MO guess if no MOs are present in the EZFIO directory
+  END_DOC
+  logical                        :: exists
+  PROVIDE ezfio_filename
+  call ezfio_has_mo_basis_mo_coef_kpts(exists)
+  if (.not.exists) then
+    if (mo_guess_type == "HCore") then
+        !mo_coef_complex = ao_ortho_lowdin_coef_complex
+        mo_coef_kpts = ao_ortho_lowdin_coef_kpts_real
+        TOUCH mo_coef_kpts
+        mo_label = 'Guess'
+        !call mo_as_eigvectors_of_mo_matrix_complex(mo_one_e_integrals_kpts,     &
+        call mo_as_eigvectors_of_mo_matrix_kpts_real(mo_one_e_integrals_kpts_real,     &
+            size(mo_one_e_integrals_kpts_real,1),                            &
+            size(mo_one_e_integrals_kpts_real,2),                            &
+            size(mo_one_e_integrals_kpts_real,3),                            &
+            mo_label,1,.false.)
+        SOFT_TOUCH mo_coef_kpts mo_label
+    else if (mo_guess_type == "Huckel") then
+        call huckel_guess_kpts_real
+    else
+      print *,  'Unrecognized MO guess type : '//mo_guess_type
+      stop 1
+    endif
+  endif
+end
+
+subroutine run_k_real
+
+  BEGIN_DOC
+!   Run SCF calculation
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer                        :: i_it, i, j, k
+
+  mo_label = "Orthonormalized"
+  call roothaan_hall_scf_kpts_real
+  call ezfio_set_hartree_fock_energy(SCF_energy)
+  print*,'hf 1e,2e,total energy'
+  print*,hf_one_electron_energy
+  print*,hf_two_electron_energy
+  print*,hf_energy
+
+end
+
+
diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f
index 440d1703..f5310696 100644
--- a/src/mo_basis/mos.irp.f
+++ b/src/mo_basis/mos.irp.f
@@ -80,6 +80,22 @@ BEGIN_PROVIDER [ integer, mo_num_per_kpt ]
 
 END_PROVIDER
 
+subroutine get_kpt_idx_mo(idx_full,k,i)
+  implicit none
+  BEGIN_DOC
+  ! idx_full is mo index in full range (up to mo_num)
+  ! k is index of the k-point for this mo
+  ! i is index of this mo within k-point k
+  ! this assumes that all kpts have the same number of mos
+  END_DOC
+
+  integer, intent(in) :: idx_full
+  integer, intent(out) :: i,k
+  i = mod(idx_full-1,mo_num_per_kpt)+1
+  k = (idx_full-1)/mo_num_per_kpt+1
+  ASSERT (k <= kpt_num)
+end
+
 
 BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_num) ]
   implicit none
diff --git a/src/mo_basis/utils_cplx.irp.f b/src/mo_basis/utils_cplx.irp.f
index 936d09cc..4d28911d 100644
--- a/src/mo_basis/utils_cplx.irp.f
+++ b/src/mo_basis/utils_cplx.irp.f
@@ -327,6 +327,85 @@ subroutine mo_as_eigvectors_of_mo_matrix_kpts(matrix,n,m,nk,label,sign,output)
     endif
 end
 
+subroutine mo_as_eigvectors_of_mo_matrix_kpts_real(matrix,n,m,nk,label,sign,output)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: n,m,nk, sign
+  character*(64), intent(in)     :: label
+  double precision, intent(in)   :: matrix(n,m,nk)
+  logical, intent(in)            :: output
+
+  integer :: i,j,k
+  double precision, allocatable  :: eigvalues(:)
+  !complex*16, allocatable  :: mo_coef_new(:,:)
+  double precision, allocatable  :: mo_coef_new(:,:),mo_coef_tmp(:,:),R(:,:), A(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, R
+
+  call write_time(6)
+  if (m /= mo_num_per_kpt) then
+    print *, irp_here, ': Error : m/= mo_num_per_kpt'
+    stop 1
+  endif
+  if (nk /= kpt_num) then
+    print *, irp_here, ': Error : nk/= kpt_num'
+    stop 1
+  endif
+  allocate(A(n,m),R(n,m),mo_coef_tmp(ao_num_per_kpt,m),mo_coef_new(ao_num_per_kpt,m),eigvalues(m))
+  do k=1,nk
+    if (sign == -1) then
+      do j=1,m
+        do i=1,n
+          A(i,j) = -matrix(i,j,k)
+        enddo
+      enddo
+    else
+      do j=1,m
+        do i=1,n
+          A(i,j) = matrix(i,j,k)
+        enddo
+      enddo
+    endif
+    mo_coef_new = dble(mo_coef_kpts(:,:,k))
+
+    call lapack_diag(eigvalues,R,A,n,m)
+    if (sign == -1) then
+      do i=1,m
+        eigvalues(i) = -eigvalues(i)
+      enddo
+    endif
+    if (output) then
+      do i=1,m
+        write (6,'(2(I8),1X,F16.10)') k,i,eigvalues(i)
+      enddo
+      write (6,'(A)') '======== ================'
+      write (6,'(A)')  ''
+      !write (6,'(A)')  'Fock Matrix'
+      !write (6,'(A)') '-----------'
+      !do i=1,n
+      !  write(*,'(200(E24.15))') A(i,:)
+      !enddo
+    endif
+
+    call dgemm('N','N',ao_num_per_kpt,m,m,1.d0, &
+        mo_coef_new,size(mo_coef_new,1),R,size(R,1),0.d0, &
+        mo_coef_tmp,size(mo_coef_tmp,1))
+    call zlacp2('N',ao_num_per_kpt,m,mo_coef_tmp,size(mo_coef_tmp,1), &
+                mo_coef_kpts(:,:,k),size(mo_coef_kpts,1))
+  enddo
+  deallocate(A,mo_coef_new,mo_coef_tmp,R,eigvalues)
+  call write_time(6)
+
+  mo_label = label
+    if (output) then
+      write (6,'(A)')  'MOs are now **'//trim(label)//'**'
+      write (6,'(A)') ''
+      write (6,'(A)')  'Eigenvalues'
+      write (6,'(A)') '-----------'
+      write (6,'(A)')  ''
+      write (6,'(A)') '======== ================'
+    endif
+end
+
 subroutine mo_as_svd_vectors_of_mo_matrix_kpts(matrix,lda,m,n,label)
   !TODO: implement
   print *, irp_here, ' not implemented for kpts'
diff --git a/src/mo_guess/mo_ortho_lowdin_cplx.irp.f b/src/mo_guess/mo_ortho_lowdin_cplx.irp.f
index b3b64ce4..ced9a63a 100644
--- a/src/mo_guess/mo_ortho_lowdin_cplx.irp.f
+++ b/src/mo_guess/mo_ortho_lowdin_cplx.irp.f
@@ -107,3 +107,35 @@ BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_overlap_kpts, (ao_num_per_kpt,ao_num
     enddo
   enddo
 END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ double precision, ao_ortho_lowdin_coef_kpts_real, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_lowdin_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_lowdin orbital
+  END_DOC
+  integer                        :: i,j,k,l
+  double precision, allocatable  :: tmp_matrix(:,:)
+  allocate (tmp_matrix(ao_num,ao_num))
+  do k=1,kpt_num
+    tmp_matrix(:,:) = 0.d0
+    do j=1, ao_num
+      tmp_matrix(j,j) = 1.d0
+    enddo
+    call ortho_lowdin(ao_overlap_kpts_real(:,:,k),ao_num_per_kpt,ao_num_per_kpt,tmp_matrix,ao_num_per_kpt,ao_num_per_kpt,lin_dep_cutoff)
+    do i=1, ao_num_per_kpt
+      do j=1, ao_num_per_kpt
+        ao_ortho_lowdin_coef_kpts_real(j,i,k) = tmp_matrix(i,j)
+      enddo
+    enddo
+  enddo
+  deallocate(tmp_matrix)
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f b/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f
index 7a9568c9..59088f6e 100644
--- a/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f
+++ b/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f
@@ -59,3 +59,20 @@ BEGIN_PROVIDER [ complex*16, mo_one_e_integrals_kpts,(mo_num_per_kpt,mo_num_per_
   print*,'Provided the one-electron integrals'
 
 END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, mo_one_e_integrals_kpts_real,(mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! array of the one-electron Hamiltonian on the |MO| basis :
+  ! sum of the kinetic and nuclear electronic potentials (and pseudo potential if needed)
+  END_DOC
+
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_one_e_integrals_kpts_real(i,j,k) = dble(mo_one_e_integrals_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
diff --git a/src/mo_one_e_ints/mo_overlap.irp.f b/src/mo_one_e_ints/mo_overlap.irp.f
index f004e1f4..9d31bddb 100644
--- a/src/mo_one_e_ints/mo_overlap.irp.f
+++ b/src/mo_one_e_ints/mo_overlap.irp.f
@@ -128,3 +128,18 @@ BEGIN_PROVIDER [ complex*16, mo_overlap_kpts,(mo_num_per_kpt,mo_num_per_kpt,kpt_
   endif
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, mo_overlap_kpts_real, (mo_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex MOs
+  END_DOC
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_overlap_kpts_real(i,j,k) = dble(mo_overlap_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/orthonormalize.irp.f b/src/mo_one_e_ints/orthonormalize.irp.f
index 4818a7aa..dd6ee8ee 100644
--- a/src/mo_one_e_ints/orthonormalize.irp.f
+++ b/src/mo_one_e_ints/orthonormalize.irp.f
@@ -19,3 +19,23 @@ subroutine orthonormalize_mos
 end
 
 
+subroutine orthonormalize_mos_k_real
+  implicit none
+  integer :: m,p,s,k
+  double precision, allocatable :: mo_coef_tmp(:,:)
+  
+  allocate(mo_coef_tmp(ao_num_per_kpt,mo_num_per_kpt))
+  do k=1,kpt_num
+    m = size(mo_coef_kpts,1)
+    p = size(mo_overlap_kpts,1)
+    mo_coef_tmp = dble(mo_coef_kpts(:,:,k))
+    call ortho_lowdin(mo_overlap_kpts_real(1,1,k),p,mo_num_per_kpt,mo_coef_tmp,m,ao_num_per_kpt,lin_dep_cutoff)
+    call zlacp2('X',ao_num_per_kpt,mo_num_per_kpt,mo_coef_tmp,size(mo_coef_tmp,1), &
+           mo_coef_kpts(1,1,k),size(mo_coef_kpts,1))
+  enddo
+  deallocate(mo_coef_tmp)
+  mo_label = 'Orthonormalized'
+  SOFT_TOUCH mo_coef_kpts mo_label
+end
+
+
diff --git a/src/scf_utils/diagonalize_fock_cplx.irp.f b/src/scf_utils/diagonalize_fock_cplx.irp.f
index 82353ed0..d8bb1b6e 100644
--- a/src/scf_utils/diagonalize_fock_cplx.irp.f
+++ b/src/scf_utils/diagonalize_fock_cplx.irp.f
@@ -112,4 +112,71 @@ BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_mo_kpts, (ao_num_per_kpt,m
   deallocate(F, diag)
 
 
+END_PROVIDER
+BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_mo_kpts_real, (ao_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Eigenvectors of the Fock matrix in the |MO| basis obtained with level shift.
+  END_DOC
+
+  integer                        :: i,j,k
+  integer                        :: n
+  !complex*16, allocatable :: F(:,:)
+  double precision, allocatable :: F(:,:)
+  double precision, allocatable  :: diag(:), mo_coef_tmp(:,:), eigvecs_tmp(:,:)
+
+  allocate( F(mo_num_per_kpt,mo_num_per_kpt) )
+  allocate (diag(mo_num_per_kpt) )
+  allocate (mo_coef_tmp(ao_num_per_kpt,mo_num_per_kpt) )
+  allocate (eigvecs_tmp(ao_num_per_kpt,mo_num_per_kpt) )
+
+  do k=1,kpt_num 
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        !F(i,j) = fock_matrix_mo_complex(i,j)
+        F(i,j) = dble(fock_matrix_mo_kpts(i,j,k))
+      enddo
+    enddo
+  
+    if(frozen_orb_scf)then
+      integer                        :: iorb,jorb
+      !todo: core/act per kpt
+      do i = 1, n_core_orb
+       iorb = list_core(i)
+       do j = 1, n_act_orb
+        jorb = list_act(j)
+        F(iorb,jorb) = 0.d0
+        F(jorb,iorb) = 0.d0
+       enddo
+      enddo
+    endif
+  
+    ! Insert level shift here
+    !todo: elec per kpt
+    do i = elec_beta_num_kpts(k)+1, elec_alpha_num_kpts(k)
+      F(i,i) += 0.5d0*level_shift
+    enddo
+  
+    do i = elec_alpha_num_kpts(k)+1, mo_num_per_kpt
+      F(i,i) += level_shift
+    enddo
+  
+    n = mo_num_per_kpt
+    call lapack_diagd_diag_in_place(diag,F,n,n)
+    
+    mo_coef_tmp = dble(mo_coef_kpts(:,:,k))
+    call dgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt, 1.d0,            &
+        mo_coef_tmp, size(mo_coef_tmp,1), F, size(F,1),                       &
+        0.d0, eigvecs_tmp, size(eigvecs_tmp,1))
+
+    call zlacp2('X',ao_num_per_kpt,mo_num_per_kpt,eigvecs_tmp,size(eigvecs_tmp,1), &
+        eigenvectors_fock_matrix_mo_kpts_real(:,:,k), size(eigenvectors_Fock_matrix_mo_kpts_real,1))
+
+!    call zgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt, (1.d0,0.d0),            &
+!        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1), F, size(F,1),                       &
+!        (0.d0,0.d0), eigenvectors_Fock_matrix_mo_kpts(:,:,k), size(eigenvectors_Fock_matrix_mo_kpts,1))
+  enddo
+  deallocate(F, diag,mo_coef_tmp,eigvecs_tmp)
+
+
 END_PROVIDER
diff --git a/src/scf_utils/diis_cplx.irp.f b/src/scf_utils/diis_cplx.irp.f
index 4a0cdabf..601b9b97 100644
--- a/src/scf_utils/diis_cplx.irp.f
+++ b/src/scf_utils/diis_cplx.irp.f
@@ -164,7 +164,7 @@ BEGIN_PROVIDER [complex*16, FPS_SPF_Matrix_AO_kpts, (AO_num_per_kpt, AO_num_per_
     call zgemm('N','N',AO_num_per_kpt,AO_num_per_kpt,AO_num_per_kpt,                           &
         (1.d0,0.d0),                                                          &
         Fock_Matrix_AO_kpts(1,1,k),Size(Fock_Matrix_AO_kpts,1),                &
-        SCF_Density_Matrix_AO_kpts(1,1,k),Size(SCF_Density_Matrix_AO_kpts,1),  &
+        scf_density_matrix_ao_kpts(1,1,k),Size(SCF_Density_Matrix_AO_kpts,1),  &
         (0.d0,0.d0),                                                          &
         scratch,Size(scratch,1))
 
diff --git a/src/scf_utils/fock_matrix_cplx.irp.f b/src/scf_utils/fock_matrix_cplx.irp.f
index cc0dc4af..e2ada6fc 100644
--- a/src/scf_utils/fock_matrix_cplx.irp.f
+++ b/src/scf_utils/fock_matrix_cplx.irp.f
@@ -360,6 +360,24 @@ END_PROVIDER
 
 END_PROVIDER
 
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_mo_kpts_real, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        fock_matrix_mo_kpts_real(i,j,k) = dble(fock_matrix_mo_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
 !============================================!
 !                                            !
 !                    kpts                    !
@@ -593,14 +611,10 @@ END_PROVIDER
           j = jj(k2)
           k = kk(k2)
           l = ll(k2)
-          kpt_i = (i-1)/ao_num_per_kpt +1
-          kpt_j = (j-1)/ao_num_per_kpt +1
-          kpt_k = (k-1)/ao_num_per_kpt +1
-          kpt_l = (l-1)/ao_num_per_kpt +1
-          idx_i = mod(i-1,ao_num_per_kpt)+1
-          idx_j = mod(j-1,ao_num_per_kpt)+1
-          idx_k = mod(k-1,ao_num_per_kpt)+1
-          idx_l = mod(l-1,ao_num_per_kpt)+1
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
           integral = i_sign(k2)*values(k1) !for klij and lkji, take complex conjugate
 
           !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
@@ -636,14 +650,10 @@ END_PROVIDER
           j = jj(k2)
           k = kk(k2)
           l = ll(k2)
-          kpt_i = (i-1)/ao_num_per_kpt +1
-          kpt_j = (j-1)/ao_num_per_kpt +1
-          kpt_k = (k-1)/ao_num_per_kpt +1
-          kpt_l = (l-1)/ao_num_per_kpt +1
-          idx_i = mod(i-1,ao_num_per_kpt)+1
-          idx_j = mod(j-1,ao_num_per_kpt)+1
-          idx_k = mod(k-1,ao_num_per_kpt)+1
-          idx_l = mod(l-1,ao_num_per_kpt)+1
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
           integral = values(k1)
 
           if (kpt_l.eq.kpt_j) then
@@ -714,14 +724,10 @@ END_PROVIDER
           j = jj(k2)
           k = kk(k2)
           l = ll(k2)
-          kpt_i = (i-1)/ao_num_per_kpt +1
-          kpt_j = (j-1)/ao_num_per_kpt +1
-          kpt_k = (k-1)/ao_num_per_kpt +1
-          kpt_l = (l-1)/ao_num_per_kpt +1
-          idx_i = mod(i-1,ao_num_per_kpt)+1
-          idx_j = mod(j-1,ao_num_per_kpt)+1
-          idx_k = mod(k-1,ao_num_per_kpt)+1
-          idx_l = mod(l-1,ao_num_per_kpt)+1
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
           integral = i_sign(k2)*values(k1) ! for klij and lkji, take conjugate
 
           !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
@@ -757,14 +763,10 @@ END_PROVIDER
           j = jj(k2)
           k = kk(k2)
           l = ll(k2)
-          kpt_i = (i-1)/ao_num_per_kpt +1
-          kpt_j = (j-1)/ao_num_per_kpt +1
-          kpt_k = (k-1)/ao_num_per_kpt +1
-          kpt_l = (l-1)/ao_num_per_kpt +1
-          idx_i = mod(i-1,ao_num_per_kpt)+1
-          idx_j = mod(j-1,ao_num_per_kpt)+1
-          idx_k = mod(k-1,ao_num_per_kpt)+1
-          idx_l = mod(l-1,ao_num_per_kpt)+1
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
           integral = values(k1)
 
           if (kpt_l.eq.kpt_j) then
diff --git a/src/scf_utils/huckel_cplx.irp.f b/src/scf_utils/huckel_cplx.irp.f
index f5dee3a4..346999df 100644
--- a/src/scf_utils/huckel_cplx.irp.f
+++ b/src/scf_utils/huckel_cplx.irp.f
@@ -89,3 +89,52 @@ subroutine huckel_guess_kpts
   deallocate(A)
 
 end
+subroutine huckel_guess_kpts_real
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j,k
+  double precision               :: accu
+  double precision               :: c
+  character*(64)                 :: label
+  !complex*16, allocatable  :: A(:,:)
+  double precision, allocatable  :: A(:,:)
+  label = "Guess"
+  c = 0.5d0 * 1.75d0
+
+  allocate (A(ao_num_per_kpt, ao_num_per_kpt))
+  do k=1,kpt_num
+    A = 0.d0
+    do j=1,ao_num_per_kpt
+      do i=1,ao_num_per_kpt
+        A(i,j) = c * ao_overlap_kpts_real(i,j,k) * (ao_one_e_integrals_diag_kpts(i,k) + ao_one_e_integrals_diag_kpts(j,k))
+      enddo
+      A(j,j) = ao_one_e_integrals_diag_kpts(j,k) + dble(ao_two_e_integral_alpha_kpts(j,j,k))
+      if (dabs(dimag(ao_two_e_integral_alpha_kpts(j,j,k))) .gt. 1.0d-10) then
+        stop 'diagonal elements of ao_two_e_integral_alpha should be real'
+      endif
+    enddo
+
+!    Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+!    Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+    call zlacp2('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_alpha_kpts(:,:,k), size(Fock_matrix_ao_alpha_kpts,1))
+    call zlacp2('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_beta_kpts(:,:,k),  size(Fock_matrix_ao_beta_kpts, 1))
+    !call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+    !       Fock_matrix_ao_alpha_kpts(:,:,k), size(Fock_matrix_ao_alpha_kpts,1))
+    !call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+    !       Fock_matrix_ao_beta_kpts(:,:,k),  size(Fock_matrix_ao_beta_kpts, 1))
+  enddo  
+
+!  TOUCH mo_coef
+
+  !TOUCH fock_matrix_ao_alpha_complex fock_matrix_ao_beta_kpts
+  TOUCH fock_matrix_ao_alpha_kpts fock_matrix_ao_beta_kpts
+  mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts_real
+  SOFT_TOUCH mo_coef_kpts
+  call save_mos
+  deallocate(A)
+
+end
diff --git a/src/scf_utils/roothaan_hall_scf_cplx.irp.f b/src/scf_utils/roothaan_hall_scf_cplx.irp.f
index 87c33cb5..64c3b16f 100644
--- a/src/scf_utils/roothaan_hall_scf_cplx.irp.f
+++ b/src/scf_utils/roothaan_hall_scf_cplx.irp.f
@@ -653,3 +653,192 @@ END_DOC
   endif
 
 end
+
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+subroutine Roothaan_Hall_SCF_kpts_real
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  complex*16, allocatable  :: Fock_matrix_DIIS(:,:,:,:),error_matrix_DIIS(:,:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+
+  integer                        :: i,j,k,kk
+  logical, external              :: qp_stop
+  complex*16, allocatable :: mo_coef_save(:,:,:)
+
+  PROVIDE ao_md5 mo_occ_kpts level_shift
+
+  allocate(mo_coef_save(ao_num_per_kpt,mo_num_per_kpt,kpt_num),                          &
+      Fock_matrix_DIIS (ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num),                 &
+      error_matrix_DIIS(ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num)                  &
+      )
+      !todo: add kpt_num dim to diis mats? (3 or 4)
+  call write_time(6)
+
+  print*,'Energy of the guess = ',scf_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  !PROVIDE fps_spf_matrix_ao_complex fock_matrix_ao_complex 
+  PROVIDE fps_spf_matrix_ao_kpts fock_matrix_ao_kpts 
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+! Increment cycle number
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+
+    if (scf_algorithm == 'DIIS') then
+
+      do kk=1,kpt_num
+        ! Store Fock and error matrices at each iteration
+        do j=1,ao_num_per_kpt
+          do i=1,ao_num_per_kpt
+            index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+            Fock_matrix_DIIS (i,j,index_dim_DIIS,kk) = fock_matrix_ao_kpts(i,j,kk)
+            error_matrix_DIIS(i,j,index_dim_DIIS,kk) = fps_spf_matrix_ao_kpts(i,j,kk)
+          enddo
+        enddo
+
+        ! Compute the extrapolated Fock matrix
+
+        call extrapolate_fock_matrix_kpts(                            &
+            error_matrix_DIIS(1,1,1,kk),Fock_matrix_DIIS(1,1,1,kk),        &
+            Fock_matrix_AO_kpts(1,1,kk),size(Fock_matrix_AO_kpts,1),       &
+            iteration_SCF,dim_DIIS                                       &
+            )
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH Fock_matrix_AO_alpha_kpts Fock_matrix_AO_beta_kpts
+
+    endif
+
+    mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH mo_coef_kpts
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(cdabs(FPS_SPF_Matrix_MO_kpts))
+
+!   SCF energy
+!    call print_debug_scf_complex
+    energy_SCF = scf_energy
+    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+      do kk=1,kpt_num
+        Fock_matrix_AO_kpts(1:ao_num_per_kpt,1:ao_num_per_kpt,kk) = &
+            Fock_matrix_DIIS (1:ao_num_per_kpt,1:ao_num_per_kpt,index_dim_DIIS,kk)
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH fock_matrix_ao_alpha_kpts Fock_matrix_AO_beta_kpts
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef_kpts level_shift
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = &
+          eigenvectors_fock_matrix_mo_kpts_real(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef_kpts
+      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+      dim_DIIS=0
+    enddo
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_scf, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if (qp_stop()) exit
+
+  enddo
+
+ if (iteration_SCF < n_it_SCF_max) then
+   mo_label = "Canonical"
+ endif
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix_kpts_real(fock_matrix_mo_kpts_real,size(Fock_matrix_mo_kpts_real,1),size(Fock_matrix_mo_kpts_real,2),size(Fock_matrix_mo_kpts_real,3),mo_label,1,.true.)
+   call save_mos
+  endif
+
+  call write_double(6, Energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/tools/diagonalize_h.irp.f b/src/tools/diagonalize_h.irp.f
index c9ae2033..ee9531e9 100644
--- a/src/tools/diagonalize_h.irp.f
+++ b/src/tools/diagonalize_h.irp.f
@@ -17,7 +17,11 @@ end
 
 subroutine routine
  implicit none
- call diagonalize_CI
+ call diagonalize_ci
  print*,'N_det = ',N_det
- call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+ if (is_complex) then
+  call save_wavefunction_general_complex(N_det,N_states,psi_det_sorted,size(psi_coef_sorted_complex,1),psi_coef_sorted_complex)
+ else
+  call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+ endif
 end
diff --git a/src/tools/print_hamiltonian.irp.f b/src/tools/print_hamiltonian.irp.f
index 207161dd..183fd502 100644
--- a/src/tools/print_hamiltonian.irp.f
+++ b/src/tools/print_hamiltonian.irp.f
@@ -9,7 +9,11 @@ program print_hamiltonian
  ! psi_coef_sorted are the wave function stored in the |EZFIO| directory.
  read_wf = .True.
  touch read_wf
- call run
+ if (is_complex) then
+  call run_complex
+ else
+  call run
+ endif
 end
 
 subroutine run
@@ -27,3 +31,42 @@ subroutine run
  enddo
 
 end
+
+subroutine run_complex
+ implicit none
+ integer :: i, j
+ complex*16 :: hij
+ double precision :: s2
+
+ print*,'i,j,Hij'
+ do j=1,N_det
+   do i=1,N_det
+     call i_h_j_complex(psi_det(1,1,i), psi_det(1,1,j), N_int, hij)
+     if (cdabs(hij) > 1.d-20) then
+       print *, i, j, dble(hij), dimag(hij)
+     endif
+   enddo
+ enddo
+ print*,'i,j,S2ij'
+ do j=1,N_det
+   do i=1,N_det
+     call get_s2(psi_det(1,1,i), psi_det(1,1,j), N_int, s2)
+     if (dabs(s2) > 1.d-20) then
+       print *, i, j, s2
+     endif
+   enddo
+ enddo
+!  use bitmasks
+  integer                        :: degree
+
+ print*,'i,j,degij'
+ do j=1,N_det
+   do i=1,N_det
+     call get_excitation_degree(psi_det(1,1,i), psi_det(1,1,j), degree, N_int)
+     if (degree.le.2) then
+       print *, i, j, degree
+     endif
+   enddo
+ enddo
+
+end
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index f8d9e7c0..42ccfe0b 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1277,3 +1277,77 @@ subroutine lapack_diag(eigvalues,eigvectors,H,nmax,n)
   deallocate(A,eigenvalues)
 end
 
+subroutine lapack_diagd_diag_in_place(eigvalues,eigvectors,nmax,n)
+  implicit none
+  BEGIN_DOC
+  ! Diagonalize matrix H(complex)
+  !
+  ! H is untouched between input and ouptut
+  !
+  ! eigevalues(i) = ith lowest eigenvalue of the H matrix
+  !
+  ! eigvectors(i,j) = <i|psi_j> where i is the basis function and psi_j is the j th eigenvector
+  !
+  END_DOC
+  integer, intent(in)            :: n,nmax
+  double precision, intent(out)  :: eigvectors(nmax,n)
+!  complex*16, intent(inout)        :: eigvectors(nmax,n)
+  double precision, intent(out)  :: eigvalues(n)
+!  double precision, intent(in)   :: H(nmax,n)
+  double precision,allocatable         :: work(:)
+  integer         ,allocatable   :: iwork(:)
+!  complex*16,allocatable         :: A(:,:)
+  integer                        :: lwork, info, i,j,l,k, liwork
+
+! print*,'Diagonalization by jacobi'
+! print*,'n = ',n
+
+  lwork = 2*n*n + 6*n + 1
+  liwork = 5*n + 3
+  allocate (work(lwork),iwork(liwork))
+
+  lwork = -1
+  liwork = -1
+  ! get optimal work size
+  call DSYEVD( 'V', 'U', n, eigvectors, nmax, eigvalues, work, lwork, &
+    iwork, liwork, info )
+  if (info < 0) then
+    print *, irp_here, ': DSYEVD: the ',-info,'-th argument had an illegal value'
+    stop 2
+  endif
+  lwork  = int( real(work(1)))
+  liwork = iwork(1)
+  deallocate (work,iwork)
+
+  allocate (work(lwork),iwork(liwork))
+  call DSYEVD( 'V', 'U', n, eigvectors, nmax, eigvalues, work, lwork, &
+    iwork, liwork, info )
+  deallocate(work,iwork)
+
+
+  if (info < 0) then
+    print *, irp_here, ': DSYEVD: the ',-info,'-th argument had an illegal value'
+    stop 2
+  else if( info > 0  ) then
+     write(*,*)'DSYEVD Failed; calling DSYEV'
+     lwork = 3*n - 1
+     allocate(work(lwork))
+     lwork = -1
+     call DSYEV('V','L',n,eigvectors,nmax,eigvalues,work,lwork,info)
+     if (info < 0) then
+       print *, irp_here, ': DSYEV: the ',-info,'-th argument had an illegal value'
+       stop 2
+     endif
+     lwork = int(work(1))
+     deallocate(work)
+     allocate(work(lwork))
+     call DSYEV('V','L',n,eigvectors,nmax,eigvalues,work,lwork,info)
+     if (info /= 0 ) then
+       write(*,*)'DSYEV Failed'
+       stop 1
+     endif
+    deallocate(work)
+  end if
+
+end
+