diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f
index d2efef89..ae00d774 100644
--- a/src/bitmask/core_inact_act_virt.irp.f
+++ b/src/bitmask/core_inact_act_virt.irp.f
@@ -448,7 +448,7 @@ BEGIN_PROVIDER [ integer, n_core_orb_kpts, (kpt_num)]
 
   do k=1,kpt_num
     n_core_orb_kpts(k) = 0
-    kshift = (1-k)*mo_num_per_kpt
+    kshift = (k-1)*mo_num_per_kpt
     do i = 1, mo_num_per_kpt
       if(mo_class(i+kshift) == 'Core')then
         n_core_orb_kpts(k) += 1
@@ -469,7 +469,7 @@ BEGIN_PROVIDER [ integer, n_inact_orb_kpts, (kpt_num)]
 
   do k=1,kpt_num
     n_inact_orb_kpts(k) = 0
-    kshift = (1-k)*mo_num_per_kpt
+    kshift = (k-1)*mo_num_per_kpt
     do i = 1, mo_num_per_kpt
       if(mo_class(i+kshift) == 'Inactive')then
         n_inact_orb_kpts(k) += 1
@@ -490,7 +490,7 @@ BEGIN_PROVIDER [ integer, n_act_orb_kpts, (kpt_num)]
 
   do k=1,kpt_num
     n_act_orb_kpts(k) = 0
-    kshift = (1-k)*mo_num_per_kpt
+    kshift = (k-1)*mo_num_per_kpt
     do i = 1, mo_num_per_kpt
       if(mo_class(i+kshift) == 'Active')then
         n_act_orb_kpts(k) += 1
@@ -511,7 +511,7 @@ BEGIN_PROVIDER [ integer, n_virt_orb_kpts, (kpt_num)]
 
   do k=1,kpt_num
     n_virt_orb_kpts(k) = 0
-    kshift = (1-k)*mo_num_per_kpt
+    kshift = (k-1)*mo_num_per_kpt
     do i = 1, mo_num_per_kpt
       if(mo_class(i+kshift) == 'Virtual')then
         n_virt_orb_kpts(k) += 1
@@ -532,7 +532,7 @@ BEGIN_PROVIDER [ integer, n_del_orb_kpts, (kpt_num)]
 
   do k=1,kpt_num
     n_del_orb_kpts(k) = 0
-    kshift = (1-k)*mo_num_per_kpt
+    kshift = (k-1)*mo_num_per_kpt
     do i = 1, mo_num_per_kpt
       if(mo_class(i+kshift) == 'Deleted')then
         n_del_orb_kpts(k) += 1
diff --git a/src/green/EZFIO.cfg b/src/green/EZFIO.cfg
new file mode 100644
index 00000000..0ad57888
--- /dev/null
+++ b/src/green/EZFIO.cfg
@@ -0,0 +1,101 @@
+[n_lanczos_complete]
+type: integer
+doc: number of lanczos iterations completed
+interface: ezfio,provider,ocaml
+default: 0
+
+[n_lanczos_iter]
+type: integer
+doc: number of lanczos iterations
+interface: ezfio,provider,ocaml
+default: 10
+
+[omega_min]
+type: double precision
+doc: lower limit of frequency for spectral density calculation
+interface: ezfio,provider,ocaml
+default: -2.e-1
+
+[omega_max]
+type: double precision
+doc: upper limit of frequency for spectral density calculation
+interface: ezfio,provider,ocaml
+default: 1.2e1
+
+[n_omega]
+type: integer
+doc: number of points for spectral density calculation
+interface: ezfio,provider,ocaml
+default: 1000
+
+[gf_epsilon]
+type: double precision
+doc: infinitesimal imaginary frequency term in Green's function
+interface: ezfio,provider,ocaml
+default: 1.e-2
+
+[n_green_vec]
+type: integer
+doc: number of holes/particles
+interface: ezfio
+default: 2
+
+[green_idx]
+interface: ezfio
+doc: homo/lumo indices
+type: integer
+size: (green.n_green_vec)
+
+[green_spin]
+interface: ezfio
+doc: homo/lumo spin
+type: integer
+size: (green.n_green_vec)
+
+[green_sign]
+interface: ezfio
+doc: homo/lumo sign
+type: double precision
+size: (green.n_green_vec)
+
+[alpha_lanczos]
+interface: ezfio
+doc: lanczos alpha values
+type: double precision
+size: (green.n_lanczos_iter,green.n_green_vec)
+
+[beta_lanczos]
+interface: ezfio
+doc: lanczos beta values
+type: double precision
+size: (green.n_lanczos_iter,green.n_green_vec)
+
+[un_lanczos]
+interface: ezfio
+doc: saved lanczos u vector
+type: double precision
+size: (2,determinants.n_det,green.n_green_vec)
+
+[vn_lanczos]
+interface: ezfio
+doc: saved lanczos v vector
+type: double precision
+size: (2,determinants.n_det,green.n_green_vec)
+
+[lanczos_eigvals]
+interface: ezfio
+doc: eigvals of tridiagonal form of H
+type: double precision
+size: (green.n_lanczos_iter,green.n_green_vec)
+
+[lanczos_debug_print]
+interface: ezfio,provider,ocaml
+type: logical
+doc: printing of lanczos vectors at every step
+default: False
+
+[n_lanczos_debug]
+interface: ezfio,provider,ocaml
+type: integer
+doc: number of elements to print
+default: 10
diff --git a/src/green/NEED b/src/green/NEED
new file mode 100644
index 00000000..4315d882
--- /dev/null
+++ b/src/green/NEED
@@ -0,0 +1 @@
+davidson fci
diff --git a/src/green/README.rst b/src/green/README.rst
new file mode 100644
index 00000000..6bdb2ca7
--- /dev/null
+++ b/src/green/README.rst
@@ -0,0 +1,6 @@
+=====
+dummy
+=====
+
+Module necessary to avoid the ``xxx  is a root module but does not contain a main file`` message.
+
diff --git a/src/green/green.main.irp.f b/src/green/green.main.irp.f
new file mode 100644
index 00000000..3fe26424
--- /dev/null
+++ b/src/green/green.main.irp.f
@@ -0,0 +1,51 @@
+program green
+  implicit none
+  BEGIN_DOC
+! TODO
+  END_DOC
+  read_wf = .True.
+  touch read_wf
+  provide n_green_vec
+  print*,'ref_bitmask_energy = ',ref_bitmask_energy
+!  call psicoefprinttest
+  call print_lanczos_eigvals
+  call print_spec
+end
+
+subroutine psicoefprinttest
+  implicit none
+  integer :: i
+  TOUCH psi_coef_complex
+  print *, 'printing ndet', N_det
+end
+subroutine print_lanczos_eigvals
+  implicit none
+  integer :: i, iunit, j
+  integer :: getunitandopen
+  character(5) :: jstr
+
+  do j=1,n_green_vec
+    write(jstr,'(I0.3)') j
+    iunit = getunitandopen('lanczos_eigval_alpha_beta.out.'//trim(jstr),'w')
+    print *, 'printing lanczos eigenvalues, alpha, beta to "lanczos_eigval_alpha_beta.out.'//trim(jstr)//'"'
+    do i=1,n_lanczos_iter
+      write(iunit,'(I6,3(E25.15))') i, lanczos_eigvals(i,j), alpha_lanczos(i,j), beta_lanczos(i,j)
+    enddo
+    close(iunit)
+  enddo
+end
+subroutine print_spec
+  implicit none
+  integer :: i, iunit, j
+  integer :: getunitandopen
+  character(5) :: jstr
+  do j=1,n_green_vec
+    write(jstr,'(I0.3)') j
+    iunit = getunitandopen('omega_A.out.'//trim(jstr),'w')
+    print *, 'printing frequency, spectral density to "omega_A.out.'//trim(jstr)//'"'
+    do i=1,n_omega
+      write(iunit,'(2(E25.15))') omega_list(i), spectral_lanczos(i,j)
+    enddo
+    close(iunit)
+  enddo
+end
diff --git a/src/green/hu0_hp.irp.f b/src/green/hu0_hp.irp.f
new file mode 100644
index 00000000..4fa7275f
--- /dev/null
+++ b/src/green/hu0_hp.irp.f
@@ -0,0 +1,847 @@
+! modified from H_S2_u_0_nstates_openmp in Davidson/u0Hu0.irp.f
+
+subroutine h_u_0_hp_openmp(v_0,u_0,N_hp,sze,spin_hp,sign_hp,idx_hp)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_0 = H|u_0> 
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  ! 
+  ! N_hp is number of holes and particles to be applied
+  ! each element of spin_hp is either 1(alpha) or 2(beta)
+  ! each element of sign_hp is either 1(particle) or -1(hole)
+  ! idx_hp contains orbital indices for holes and particles
+  END_DOC
+  integer, intent(in)            :: N_hp,sze
+  complex*16, intent(inout)  :: v_0(sze,N_hp), u_0(sze,N_hp)
+  integer :: k
+  complex*16, allocatable  :: u_t(:,:), v_t(:,:)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
+  allocate(u_t(N_hp,N_det),v_t(N_hp,N_det))
+  do k=1,N_hp
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
+  enddo
+  v_t = (0.d0,0.d0)
+  call cdtranspose(                                                   &
+      u_0,                                                           &
+      size(u_0, 1),                                                  &
+      u_t,                                                           &
+      size(u_t, 1),                                                  &
+      N_det, N_hp)
+
+  call h_u_0_hp_openmp_work(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,1,N_det,0,1)
+  deallocate(u_t)
+
+  call cdtranspose(                                                   &
+      v_t,                                                           &
+      size(v_t, 1),                                                  &
+      v_0,                                                           &
+      size(v_0, 1),                                                  &
+      N_hp, N_det)
+  deallocate(v_t)
+
+  do k=1,N_hp
+    call cdset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+  enddo
+
+end
+
+
+subroutine h_u_0_hp_openmp_work(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_t = H|u_t>
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer, intent(in)            :: N_hp,sze,istart,iend,ishift,istep
+  complex*16, intent(in)   :: u_t(N_hp,N_det)
+  complex*16, intent(out)  :: v_t(N_hp,sze)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+
+  
+  PROVIDE ref_bitmask_energy N_int
+
+  select case (N_int)
+    case (1)
+      call H_u_0_hp_openmp_work_1(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+    case (2)
+      call H_u_0_hp_openmp_work_2(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+    case (3)
+      call H_u_0_hp_openmp_work_3(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+    case (4)
+      call H_u_0_hp_openmp_work_4(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+    case default
+      call H_u_0_hp_openmp_work_N_int(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+  end select
+end
+BEGIN_TEMPLATE
+
+subroutine h_u_0_hp_openmp_work_$N_int(v_t,u_t,N_hp,sze,spin_hp,sign_hp,idx_hp,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_t = H|u_t> and s_t = S^2 |u_t>
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer, intent(in)            :: N_hp,sze,istart,iend,ishift,istep
+  complex*16, intent(in)   :: u_t(N_hp,N_det)
+  complex*16, intent(out)  :: v_t(N_hp,sze)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+
+  complex*16               :: hij
+  double precision :: hii 
+  integer                        :: i,j,k,l
+  integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
+  integer                        :: istate
+  integer                        :: krow, kcol, krow_b, kcol_b
+  integer                        :: lrow, lcol
+  integer                        :: mrow, mcol
+  integer(bit_kind)              :: spindet($N_int)
+  integer(bit_kind)              :: tmp_det($N_int,2)
+  integer(bit_kind)              :: tmp_det2($N_int,2)
+  integer(bit_kind)              :: tmp_det3($N_int,2)
+  integer(bit_kind), allocatable :: buffer(:,:)
+  integer                        :: n_doubles
+  integer, allocatable           :: doubles(:)
+  integer, allocatable           :: singles_a(:)
+  integer, allocatable           :: singles_b(:)
+  integer, allocatable           :: idx(:), idx0(:)
+  integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
+  integer*8                      :: k8
+
+  logical, allocatable :: exc_is_banned_a1(:),exc_is_banned_b1(:),exc_is_banned_a2(:),exc_is_banned_b2(:)
+  logical, allocatable :: exc_is_banned_ab1(:),exc_is_banned_ab12(:),allowed_hp(:)
+  logical :: all_banned_a1,all_banned_b1,all_banned_a2,all_banned_b2
+  logical :: all_banned_ab12,all_banned_ab1
+  integer                        :: ii,na,nb
+  double precision, allocatable :: hii_hp(:)
+  complex*16, allocatable :: hij_hp(:)
+
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+  allocate(idx0(maxab))
+  
+  do i=1,maxab
+    idx0(i) = i
+  enddo
+
+  ! Prepare the array of all alpha single excitations
+  ! -------------------------------------------------
+
+  PROVIDE N_int nthreads_davidson elec_num
+  !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson)        &
+      !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,                &
+      !$OMP          psi_bilinear_matrix_columns,                    &
+      !$OMP          psi_det_alpha_unique, psi_det_beta_unique,      &
+      !$OMP          n_det_alpha_unique, n_det_beta_unique, N_int,   &
+      !$OMP          psi_bilinear_matrix_transp_rows,                &
+      !$OMP          psi_bilinear_matrix_transp_columns,             &
+      !$OMP          psi_bilinear_matrix_transp_order, N_hp,         &
+      !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
+      !$OMP          psi_bilinear_matrix_columns_loc,                &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t,        &
+      !$OMP          spin_hp,sign_hp,idx_hp,        &
+      !$OMP          elec_num_tab,nuclear_repulsion, &
+      !$OMP          ishift, idx0, u_t, maxab)                       &
+      !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
+      !$OMP          lcol, lrow, l_a, l_b,                           &
+      !$OMP          buffer, doubles, n_doubles,                     &
+      !$OMP          tmp_det2, hii, hij, idx, l, kcol_prev,     &
+      !$OMP          singles_a, n_singles_a, singles_b,              &
+      !$OMP          exc_is_banned_a1,exc_is_banned_b1,exc_is_banned_ab1, &
+      !$OMP          exc_is_banned_a2,exc_is_banned_b2,exc_is_banned_ab12, &
+      !$OMP          all_banned_a1,all_banned_b1,all_banned_ab1, &
+      !$OMP          all_banned_a2,all_banned_b2,all_banned_ab12, &
+      !$OMP          allowed_hp, &
+      !$OMP          ii, hij_hp, j, hii_hp,na,nb, &
+      !$OMP          n_singles_b, k8)
+  
+  ! Alpha/Beta double excitations
+  ! =============================
+    
+  allocate( buffer($N_int,maxab),                                     &
+      singles_a(maxab),                                              &
+      singles_b(maxab),                                              &
+      doubles(maxab),                                                &
+      idx(maxab), &
+      exc_is_banned_a1(N_hp), &
+      exc_is_banned_b1(N_hp), &
+      exc_is_banned_a2(N_hp), &
+      exc_is_banned_b2(N_hp), &
+      exc_is_banned_ab1(N_hp), &
+      exc_is_banned_ab12(N_hp), &
+      allowed_hp(N_hp), &
+      hij_hp(N_hp), &
+      hii_hp(N_hp))
+
+  kcol_prev=-1
+  all_banned_b1=.False.
+  ASSERT (iend <= N_det)
+  ASSERT (istart > 0)
+  ASSERT (istep  > 0)
+
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=istart+ishift,iend,istep
+  ! iterate over dets in psi
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    if (kcol /= kcol_prev) then !if we've moved to a new unique beta determinant
+      call get_list_hp_banned_spin(tmp_det,N_hp,exc_is_banned_b1,spin_hp,sign_hp,idx_hp,2,$N_int,all_banned_b1)
+      if (all_banned_b1) then
+        kcol_prev = kcol
+        cycle
+      else ! get all unique beta dets connected to this one by a single excitation
+        call get_all_spin_singles_$N_int(                              &
+            psi_det_beta_unique, idx0,                                 &
+            tmp_det(1,2), N_det_beta_unique,                           &
+            singles_b, n_singles_b)
+        kcol_prev = kcol
+      endif
+    else
+      if (all_banned_b1) cycle
+    endif
+
+    ! at least some beta allowed
+    ! check alpha
+    call get_list_hp_banned_spin(tmp_det,N_hp,exc_is_banned_a1,spin_hp,sign_hp,idx_hp,1,$N_int,all_banned_a1)
+    if (all_banned_a1) cycle
+
+    all_banned_ab1=.True.
+    do ii=1,N_hp
+      exc_is_banned_ab1(ii)=(exc_is_banned_a1(ii).or.exc_is_banned_b1(ii))
+      all_banned_ab1 = (all_banned_ab1.and.exc_is_banned_ab1(ii))
+    enddo
+    if (all_banned_ab1) cycle
+!    kcol_prev = kcol ! keep track of old col to see when we've moved to a new one
+
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
+
+    do i=1,n_singles_b ! loop over other columns in this row
+      lcol = singles_b(i)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
+
+      call get_list_hp_banned_spin(tmp_det2,N_hp,exc_is_banned_b2,spin_hp,sign_hp,idx_hp,2,$N_int,all_banned_b2)
+      if (all_banned_b2) cycle
+
+      l_a = psi_bilinear_matrix_columns_loc(lcol) ! location of start of this column within psi_bilinear_mat
+      ASSERT (l_a <= N_det)
+
+      do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - l_a ! loop over rows in this column
+        lrow = psi_bilinear_matrix_rows(l_a) ! get row (index of unique alpha det)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! get alpha det
+
+        ASSERT (l_a <= N_det)
+        idx(j) = l_a ! indices of dets within psi_bilinear_mat
+        l_a = l_a+1
+      enddo
+      j = j-1
+      ! get all alpha dets in this column that are connected to ref alpha by a single exc.
+      call get_all_spin_singles_$N_int(                              &
+          buffer, idx, tmp_det(1,1), j,                              &
+          singles_a, n_singles_a )
+
+      ! Loop over alpha singles
+      ! -----------------------
+
+      do k = 1,n_singles_a
+        l_a = singles_a(k)
+        ASSERT (l_a <= N_det)
+
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+        call get_list_hp_banned_spin(tmp_det2,N_hp,exc_is_banned_a2,spin_hp,sign_hp,idx_hp,1,$N_int,all_banned_a2)
+        if (all_banned_a2) cycle
+        all_banned_ab12 = .True.
+        do ii=1,N_hp
+          exc_is_banned_ab12(ii)=((exc_is_banned_ab1(ii).or.exc_is_banned_b2(ii)).or.exc_is_banned_a2(ii))
+          allowed_hp(ii)=(.not.exc_is_banned_ab12(ii))
+          all_banned_ab12 = (all_banned_ab12.and.exc_is_banned_ab12(ii))
+        enddo
+        if (all_banned_ab12) cycle
+        call i_h_j_double_alpha_beta_hp(tmp_det,tmp_det2,$N_int,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+        do l=1,N_hp
+          v_t(l,k_a) = v_t(l,k_a) + hij_hp(l) * u_t(l,l_a)
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO 
+
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=istart+ishift,iend,istep
+
+
+    ! Single and double alpha excitations
+    ! ===================================
+    
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    call get_list_hp_banned_ab(tmp_det,N_hp,exc_is_banned_ab1,spin_hp,sign_hp,idx_hp,$N_int,all_banned_ab1)
+    if (all_banned_ab1) cycle
+
+    
+    ! Initial determinant is at k_b in beta-major representation
+    ! ----------------------------------------------------------------------
+    
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,1)
+    
+    ! Loop inside the beta column to gather all the connected alphas
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
+    do i=1,N_det_alpha_unique
+      if (l_a > N_det) exit
+      lcol = psi_bilinear_matrix_columns(l_a)
+      if (lcol /= kcol) exit
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow)
+      idx(i) = l_a
+      l_a = l_a+1
+    enddo
+    i = i-1
+    
+    !call get_all_spin_singles_and_doubles_$N_int(                    &
+    !    buffer, idx, spindet, i,                                     &
+    !    singles_a, doubles, n_singles_a, n_doubles )
+    call get_all_spin_singles_and_doubles(                           &
+        buffer, idx, spindet, $N_int, i,                             &
+        singles_a, doubles, n_singles_a, n_doubles )
+
+    ! Compute Hij for all alpha singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    do i=1,n_singles_a
+      l_a = singles_a(i)
+      ASSERT (l_a <= N_det)
+
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+      call get_list_hp_banned_spin(tmp_det2,N_hp,exc_is_banned_a2,spin_hp,sign_hp,idx_hp,1,$N_int,all_banned_a2)
+      if (all_banned_a2) cycle
+      all_banned_ab12 = .True.
+      do ii=1,N_hp
+        exc_is_banned_ab12(ii)=(exc_is_banned_ab1(ii).or.exc_is_banned_a2(ii))
+        allowed_hp(ii)=(.not.exc_is_banned_ab12(ii))
+        all_banned_ab12 = (all_banned_ab12.and.exc_is_banned_ab12(ii))
+      enddo
+      if (all_banned_ab12) cycle
+      call i_h_j_mono_spin_hp(tmp_det,tmp_det2,$N_int,1, hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+
+      do l=1,N_hp
+        v_t(l,k_a) = v_t(l,k_a) + hij_hp(l) * u_t(l,l_a)
+        ! single => sij = 0 
+      enddo
+    enddo
+    
+    ! Compute Hij for all alpha doubles
+    ! ----------------------------------
+    
+    do i=1,n_doubles
+      l_a = doubles(i)
+      ASSERT (l_a <= N_det)
+
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      call get_list_hp_banned_single_spin(psi_det_alpha_unique(1,lrow),N_hp,exc_is_banned_a2,spin_hp,sign_hp,idx_hp,1,$N_int,all_banned_a2)
+      if (all_banned_a2) cycle
+      all_banned_ab12 = .True.
+      do ii=1,N_hp
+        exc_is_banned_ab12(ii)=(exc_is_banned_ab1(ii).or.exc_is_banned_a2(ii))
+        allowed_hp(ii)=(.not.exc_is_banned_ab12(ii))
+        all_banned_ab12 = (all_banned_ab12.and.exc_is_banned_ab12(ii))
+      enddo
+      if (all_banned_ab12) cycle
+      call i_h_j_double_spin_hp( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int,1,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+      do l=1,N_hp
+        v_t(l,k_a) = v_t(l,k_a) + hij_hp(l) * u_t(l,l_a)
+        ! same spin => sij = 0
+      enddo
+    enddo
+    
+
+    ! Single and double beta excitations
+    ! ==================================
+
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    kcol = psi_bilinear_matrix_columns(k_a)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    
+    !! should already be done from top of loop?
+    !call get_list_hp_banned_ab(tmp_det,N_hp,exc_is_banned_ab1,spin_hp,sign_hp,idx_hp,$N_int,all_banned_ab1)
+    !if (all_banned_ab1) cycle
+    
+    spindet(1:$N_int) = tmp_det(1:$N_int,2)
+    
+    ! Initial determinant is at k_b in beta-major representation
+    ! -----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a) 
+    ASSERT (k_b <= N_det)
+    
+    ! Loop inside the alpha row to gather all the connected betas
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
+    do i=1,N_det_beta_unique
+      if (l_b > N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l_b)
+      if (lrow /= krow) exit
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
+      idx(i) = l_b
+      l_b = l_b+1
+    enddo
+    i = i-1
+  
+    !call get_all_spin_singles_and_doubles_$N_int(                    &
+    !    buffer, idx, spindet, i,                                     &
+    !    singles_b, doubles, n_singles_b, n_doubles )
+    call get_all_spin_singles_and_doubles(                           &
+        buffer, idx, spindet, $N_int, i,                             &
+        singles_b, doubles, n_singles_b, n_doubles )
+    
+    ! Compute Hij for all beta singles
+    ! ----------------------------------
+    
+    tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    do i=1,n_singles_b
+      l_b = singles_b(i)
+      ASSERT (l_b <= N_det)
+
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+      call get_list_hp_banned_spin(tmp_det2,N_hp,exc_is_banned_b2,spin_hp,sign_hp,idx_hp,2,$N_int,all_banned_b2)
+      if (all_banned_b2) cycle
+      all_banned_ab12 = .True.
+      do ii=1,N_hp
+        exc_is_banned_ab12(ii)=(exc_is_banned_ab1(ii).or.exc_is_banned_b2(ii))
+        allowed_hp(ii)=(.not.exc_is_banned_ab12(ii))
+        all_banned_ab12 = (all_banned_ab12.and.exc_is_banned_ab12(ii))
+      enddo
+      if (all_banned_ab12) cycle
+      call i_h_j_mono_spin_hp(tmp_det,tmp_det2,$N_int,2, hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+      l_a = psi_bilinear_matrix_transp_order(l_b)
+      ASSERT (l_a <= N_det)
+      do l=1,N_hp
+        v_t(l,k_a) = v_t(l,k_a) + hij_hp(l) * u_t(l,l_a)
+        ! single => sij = 0 
+      enddo
+    enddo
+    
+    ! Compute Hij for all beta doubles
+    ! ----------------------------------
+    
+    do i=1,n_doubles
+      l_b = doubles(i)
+      ASSERT (l_b <= N_det)
+
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      call get_list_hp_banned_single_spin(psi_det_beta_unique(1,lcol),N_hp,exc_is_banned_b2,spin_hp,sign_hp,idx_hp,2,$N_int,all_banned_b2)
+      if (all_banned_b2) cycle
+      all_banned_ab12 = .True.
+      do ii=1,N_hp
+        exc_is_banned_ab12(ii)=(exc_is_banned_ab1(ii).or.exc_is_banned_b2(ii))
+        allowed_hp(ii)=(.not.exc_is_banned_ab12(ii))
+        all_banned_ab12 = (all_banned_ab12.and.exc_is_banned_ab12(ii))
+      enddo
+      if (all_banned_ab12) cycle
+      call i_h_j_double_spin_hp( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int,2,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+      l_a = psi_bilinear_matrix_transp_order(l_b)
+      ASSERT (l_a <= N_det)
+
+      do l=1,N_hp
+        v_t(l,k_a) = v_t(l,k_a) + hij_hp(l) * u_t(l,l_a)
+        ! same spin => sij = 0 
+      enddo
+    enddo
+
+
+    ! Diagonal contribution
+    ! =====================
+
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    call get_list_hp_banned_ab(tmp_det,N_hp,exc_is_banned_ab1,spin_hp,sign_hp,idx_hp,$N_int,all_banned_ab1)
+    if (all_banned_ab1) cycle
+    
+    double precision, external :: diag_H_mat_elem, diag_S_mat_elem
+    hii = diag_h_mat_elem(tmp_det,$N_int)
+
+    do ii=1,N_hp
+      if(exc_is_banned_ab1(ii)) then
+        hii_hp(ii)=0.d0
+      else
+        tmp_det2=tmp_det
+        na=elec_num_tab(spin_hp(ii))
+        nb=elec_num_tab(iand(spin_hp(ii),1)+1)
+        hii_hp(ii)=hii
+        if (sign_hp(ii)>0) then
+          call ac_operator(idx_hp(ii),spin_hp(ii),tmp_det2,hii_hp(ii),$N_int,na,nb)
+        else
+          call a_operator(idx_hp(ii),spin_hp(ii),tmp_det2,hii_hp(ii),$N_int,na,nb)
+        endif
+      endif
+      v_t(ii,k_a) = v_t(ii,k_a) + (nuclear_repulsion + hii_hp(ii)) * u_t(ii,k_a)
+    enddo
+    
+
+  end do
+  !$OMP END DO 
+  deallocate(buffer, singles_a, singles_b, doubles, idx, &
+              exc_is_banned_a1, &
+              exc_is_banned_b1, &
+              exc_is_banned_a2, &
+              exc_is_banned_b2, &
+              exc_is_banned_ab1, &
+              exc_is_banned_ab12, &
+              allowed_hp, &
+              hij_hp, hii_hp )
+  !$OMP END PARALLEL
+  deallocate(idx0)
+end
+
+SUBST [ N_int ]
+
+1;;
+2;;
+3;;
+4;;
+N_int;;
+
+END_TEMPLATE
+
+
+
+subroutine i_h_j_double_spin_hp(key_i,key_j,Nint,ispin,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! todo: maybe make new get_double_excitation_spin?
+  !       the 4 index ordering is already done in there, so we could avoid duplicating that work
+  ! Returns <i|H|j> where i and j are determinants differing by a same-spin double excitation
+  END_DOC
+  integer, intent(in)            :: Nint,ispin,N_hp
+  integer(bit_kind), intent(in)  :: key_i(Nint), key_j(Nint)
+  complex*16, intent(out)  :: hij_hp(N_hp)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  logical, intent(in) :: allowed_hp(N_hp)
+  complex*16 :: hij0 
+  double precision :: phase_hp(N_hp)
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+  complex*16, external     :: mo_two_e_integral_complex
+  integer :: i1,i2,i3,i4,j2,j3,ii
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+
+  call get_double_excitation_spin(key_i,key_j,exc,phase,Nint)
+  hij0 = phase*(mo_two_e_integral_complex(                       &
+      exc(1,1),                                                      &
+      exc(2,1),                                                      &
+      exc(1,2),                                                      &
+      exc(2,2)) -                                  &
+      mo_two_e_integral_complex(                                    &
+      exc(1,1),                                                      &
+      exc(2,1),                                                      &
+      exc(2,2),                                                      &
+      exc(1,2)) )
+
+  ASSERT (exc(1,1) < exc(2,1))
+  ASSERT (exc(1,2) < exc(2,2))
+  i1=min(exc(1,1),exc(1,2))
+  j2=max(exc(1,1),exc(1,2))
+  j3=min(exc(2,1),exc(2,2))
+  i4=max(exc(2,1),exc(2,2))
+  i2=min(j2,j3)
+  i3=max(j2,j3)
+
+  do ii=1,N_hp
+    if (allowed_hp(ii)) then
+      if (ispin.eq.spin_hp(ii)) then
+        if ((idx_hp(ii).lt.i1).or.(idx_hp(ii).gt.i4)) then
+          phase_hp(ii)=1.d0
+        else if ((idx_hp(ii).lt.i2).or.(idx_hp(ii).gt.i3)) then
+          phase_hp(ii)=-1.d0
+        else
+          phase_hp(ii)=1.d0
+        endif
+      else
+        phase_hp(ii)=1.d0
+      endif
+    else
+      phase_hp(ii)=0.d0
+    endif
+    hij_hp(ii) = hij0 * phase_hp(ii)
+  enddo
+end
+
+subroutine i_h_j_mono_spin_hp(key_i,key_j,Nint,spin,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! todo: change this to use normal version of get_mono_excitation_from_fock
+  !       all info needed is in phase and hij, h/p part can happen after getting hij the normal way
+  ! Returns <i|H|j> where i and j are determinants differing by a single excitation
+  END_DOC
+  integer, intent(in)            :: Nint, spin, N_hp
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij_hp(N_hp)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  logical, intent(in) :: allowed_hp(N_hp)
+  !double precision :: phase_hp(N_hp)
+  complex*16 :: hij0
+  
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+ 
+  call get_single_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
+
+  call get_single_excitation_from_fock_hp(key_i,key_j,exc(1,1),exc(1,2),spin,phase,N_hp,hij_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+end
+
+subroutine get_single_excitation_from_fock_hp(det_1,det_2,h,p,spin,phase,N_hp,hij_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+  use bitmasks
+  implicit none
+  integer,intent(in) :: h,p,spin,N_hp
+  double precision, intent(in)  :: phase
+  integer(bit_kind), intent(in) :: det_1(N_int,2), det_2(N_int,2)
+  complex*16, intent(out) :: hij_hp(N_hp)
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  logical, intent(in) :: allowed_hp(N_hp)
+  double precision :: phase_hp(N_hp)
+  complex*16 :: hij0
+  integer :: low,high
+
+  integer(bit_kind) :: differences(N_int,2)
+  integer(bit_kind) :: hole(N_int,2)
+  integer(bit_kind) :: partcl(N_int,2)
+  integer :: occ_hole(N_int*bit_kind_size,2)
+  integer :: occ_partcl(N_int*bit_kind_size,2)
+  integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+  integer :: i0,i,ii
+  do i = 1, N_int
+    differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
+    differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
+    hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+    hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+    partcl(i,1) = iand(differences(i,1),det_1(i,1))
+    partcl(i,2) = iand(differences(i,2),det_1(i,2))
+  enddo
+  call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+  call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+  hij0 = fock_op_cshell_ref_bitmask_cplx(h,p)
+  ! holes :: direct terms
+  do i0 = 1, n_occ_ab_hole(1)
+    i = occ_hole(i0,1)
+    hij0 -= big_array_coulomb_integrals_complex(i,h,p) ! get_mo_bielec_integral_schwartz(h,i,p,i,mo_integrals_map)
+  enddo
+  do i0 = 1, n_occ_ab_hole(2)
+    i = occ_hole(i0,2)
+    hij0 -= big_array_coulomb_integrals_complex(i,h,p) !get_mo_bielec_integral_schwartz(h,i,p,i,mo_integrals_map)
+  enddo
+ 
+  ! holes :: exchange terms
+  do i0 = 1, n_occ_ab_hole(spin)
+    i = occ_hole(i0,spin)
+    hij0 += big_array_exchange_integrals_complex(i,h,p) ! get_mo_bielec_integral_schwartz(h,i,i,p,mo_integrals_map)
+  enddo
+ 
+  ! particles :: direct terms
+  do i0 = 1, n_occ_ab_partcl(1)
+    i = occ_partcl(i0,1)
+    hij0 += big_array_coulomb_integrals_complex(i,h,p)!get_mo_bielec_integral_schwartz(h,i,p,i,mo_integrals_map)
+  enddo
+  do i0 = 1, n_occ_ab_partcl(2)
+    i = occ_partcl(i0,2)
+    hij0 += big_array_coulomb_integrals_complex(i,h,p) !get_mo_bielec_integral_schwartz(h,i,p,i,mo_integrals_map)
+  enddo
+ 
+  ! particles :: exchange terms
+  do i0 = 1, n_occ_ab_partcl(spin)
+    i = occ_partcl(i0,spin)
+    hij0 -= big_array_exchange_integrals_complex(i,h,p)!get_mo_bielec_integral_schwartz(h,i,i,p,mo_integrals_map)
+  enddo
+
+  low=min(h,p)
+  high=max(h,p)
+
+!!  do ii=1,N_hp
+!!    if (.not.allowed_hp(ii)) then
+!!      phase_hp(ii) = 0.d0
+!!      cycle
+!!    else if (spin_hp(ii).ne.spin) then
+!!      phase_hp(ii) = 1.d0
+!!    else
+!!      if ((low.lt.idx_hp(ii)).and.(high.gt.idx_hp(ii))) then
+!!        phase_hp(ii) = -1.d0
+!!      else
+!!        phase_hp(ii) = 1.d0
+!!      endif
+!!    endif
+!!  enddo
+!!
+!!  do ii=1,N_hp
+!!    if (allowed_hp(ii)) then
+!!      hij_hp(ii) = hij + sign_hp(ii) * big_array_coulomb_integrals(idx_hp(ii),h,p)
+!!      if (spin.eq.spin_hp(ii)) then
+!!        hij_hp(ii) = hij_hp(ii) - sign_hp(ii) * big_array_exchange_integrals(idx_hp(ii),h,p)
+!!      endif
+!!    else
+!!      hij_hp(ii) = 0.d0
+!!    endif
+!!  enddo
+!!
+!!  do ii=1,N_hp
+!!    hij_hp(ii) = hij_hp(ii) * phase_hp(ii) * phase
+!!  enddo
+
+  do ii=1,N_hp
+    if (.not.allowed_hp(ii)) then
+      phase_hp(ii) = 0.d0
+      hij_hp(ii) = 0.d0
+      cycle
+    else if (spin.eq.spin_hp(ii)) then
+      hij_hp(ii) = hij0 + sign_hp(ii) *(big_array_coulomb_integrals_complex(idx_hp(ii),h,p) - big_array_exchange_integrals_complex(idx_hp(ii),h,p))
+      if ((low.lt.idx_hp(ii)).and.(high.gt.idx_hp(ii))) then
+        phase_hp(ii) = -1.d0
+      else
+        phase_hp(ii) = 1.d0
+      endif
+    else
+      phase_hp(ii) = 1.d0
+      hij_hp(ii) = hij0 + sign_hp(ii) * big_array_coulomb_integrals_complex(idx_hp(ii),h,p)
+    endif
+    hij_hp(ii) = hij_hp(ii) * phase * phase_hp(ii)
+  enddo
+
+end
+
+
+subroutine i_H_j_double_alpha_beta_hp(key_i,key_j,Nint,hij_hp,N_hp,spin_hp,sign_hp,idx_hp,allowed_hp)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns <i|H|j> where i and j are determinants differing by an opposite-spin double excitation
+  END_DOC
+  integer, intent(in)            :: Nint,N_hp
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij_hp(N_hp)
+  complex*16 :: hij0
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  logical, intent(in) :: allowed_hp(N_hp)
+  double precision :: phase_hp(N_hp)
+  integer :: i
+
+  integer :: lowhigh(2,2)
+  integer                        :: exc(0:2,2,2)
+  double precision               :: phase, phase2
+  complex*16, external     :: mo_two_e_integral_complex
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+
+  call get_single_excitation_spin(key_i(1,1),key_j(1,1),exc(0,1,1),phase,Nint)
+  call get_single_excitation_spin(key_i(1,2),key_j(1,2),exc(0,1,2),phase2,Nint)
+  phase = phase*phase2
+
+  if (exc(1,1,1) == exc(1,2,2)) then
+    hij0 =  big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
+  else if (exc(1,2,1) == exc(1,1,2)) then
+    hij0 =  big_array_exchange_integrals_complex(exc(1,2,1),exc(1,1,1),exc(1,2,2))
+  else
+    hij0 = mo_two_e_integral_complex(                              &
+        exc(1,1,1),                                                  &
+        exc(1,1,2),                                                  &
+        exc(1,2,1),                                                  &
+        exc(1,2,2))
+  endif
+  
+  !todo: clean this up
+  ! if new particle/hole is between p/h of single exc of same spin, then parity changes, otherwise stays the same
+  ! value of Hij for double excitation is unchanged (new p/h is not one of the indices involved in the excitation)
+  
+  lowhigh(1,1)=min(exc(1,1,1),exc(1,2,1))
+  lowhigh(2,1)=max(exc(1,1,1),exc(1,2,1))
+  lowhigh(1,2)=min(exc(1,1,2),exc(1,2,2))
+  lowhigh(2,2)=max(exc(1,1,2),exc(1,2,2))
+  do i=1,N_hp
+    if (.not.allowed_hp(i)) then
+      phase_hp(i)=0.d0
+    else if ((idx_hp(i).gt.lowhigh(1,spin_hp(i))).and.(idx_hp(i).lt.lowhigh(2,spin_hp(i)))) then
+      phase_hp(i)=-1.d0
+    else
+      phase_hp(i)=1.d0
+    endif
+    hij_hp(i)=hij0*phase*phase_hp(i)
+  enddo
+end
diff --git a/src/green/hu0_lanczos.irp.f b/src/green/hu0_lanczos.irp.f
new file mode 100644
index 00000000..6f7ebf1d
--- /dev/null
+++ b/src/green/hu0_lanczos.irp.f
@@ -0,0 +1,405 @@
+! modified from H_S2_u_0_nstates_openmp in Davidson/u0Hu0.irp.f
+
+subroutine H_u_0_openmp(v_0,u_0,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_0 = H|u_0>
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer :: N_st=1
+  integer, intent(in)            :: sze
+  complex*16, intent(inout)  :: v_0(sze), u_0(sze)
+  integer :: k
+  call cdset_order(u_0(1),psi_bilinear_matrix_order,N_det)
+  v_0 = (0.d0,0.d0)
+  
+  call h_u_0_openmp_work(v_0,u_0,sze,1,N_det,0,1)
+
+  call cdset_order(v_0(1),psi_bilinear_matrix_order_reverse,N_det)
+  call cdset_order(u_0(1),psi_bilinear_matrix_order_reverse,N_det)
+
+end
+
+
+subroutine H_u_0_openmp_work(v_t,u_t,sze,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_t = H|u_t>
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer :: N_st=1
+  integer, intent(in)            :: sze,istart,iend,ishift,istep
+  complex*16, intent(in)   :: u_t(N_det)
+  complex*16, intent(out)  :: v_t(sze)
+
+  
+  PROVIDE ref_bitmask_energy N_int
+
+  select case (N_int)
+    case (1)
+      call H_u_0_openmp_work_1(v_t,u_t,sze,istart,iend,ishift,istep)
+    case (2)
+      call H_u_0_openmp_work_2(v_t,u_t,sze,istart,iend,ishift,istep)
+    case (3)
+      call H_u_0_openmp_work_3(v_t,u_t,sze,istart,iend,ishift,istep)
+    case (4)
+      call H_u_0_openmp_work_4(v_t,u_t,sze,istart,iend,ishift,istep)
+    case default
+      call H_u_0_openmp_work_N_int(v_t,u_t,sze,istart,iend,ishift,istep)
+  end select
+end
+BEGIN_TEMPLATE
+
+subroutine H_u_0_openmp_work_$N_int(v_t,u_t,sze,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_t = H|u_t>
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer :: N_st=1
+  integer, intent(in)            :: sze,istart,iend,ishift,istep
+  complex*16, intent(in)   :: u_t(N_det)
+  complex*16, intent(out)  :: v_t(sze) 
+
+  complex*16               :: hij
+  double precision :: hii 
+  integer                        :: i,j,k,l
+  integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
+  integer                        :: istate
+  integer                        :: krow, kcol, krow_b, kcol_b
+  integer                        :: lrow, lcol
+  integer                        :: mrow, mcol
+  integer(bit_kind)              :: spindet($N_int)
+  integer(bit_kind)              :: tmp_det($N_int,2)
+  integer(bit_kind)              :: tmp_det2($N_int,2)
+  integer(bit_kind)              :: tmp_det3($N_int,2)
+  integer(bit_kind), allocatable :: buffer(:,:)
+  integer                        :: n_doubles
+  integer, allocatable           :: doubles(:)
+  integer, allocatable           :: singles_a(:)
+  integer, allocatable           :: singles_b(:)
+  integer, allocatable           :: idx(:), idx0(:)
+  integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
+  integer*8                      :: k8
+
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+  allocate(idx0(maxab))
+  
+  do i=1,maxab
+    idx0(i) = i
+  enddo
+
+  ! Prepare the array of all alpha single excitations
+  ! -------------------------------------------------
+
+  PROVIDE N_int nthreads_davidson
+  !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson)        &
+      !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,                &
+      !$OMP          psi_bilinear_matrix_columns,                    &
+      !$OMP          psi_det_alpha_unique, psi_det_beta_unique,      &
+      !$OMP          n_det_alpha_unique, n_det_beta_unique, N_int,   &
+      !$OMP          psi_bilinear_matrix_transp_rows,                &
+      !$OMP          psi_bilinear_matrix_transp_columns,             &
+      !$OMP          psi_bilinear_matrix_transp_order, N_st,         &
+      !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
+      !$OMP          psi_bilinear_matrix_columns_loc,                &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t,             &
+      !$OMP          ishift, idx0, u_t, maxab)                       &
+      !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
+      !$OMP          lcol, lrow, l_a, l_b,                           &
+      !$OMP          buffer, doubles, n_doubles,                     &
+      !$OMP          tmp_det2, hii, hij, idx, l, kcol_prev,          &
+      !$OMP          singles_a, n_singles_a, singles_b,              &
+      !$OMP          n_singles_b, k8)
+  
+  ! Alpha/Beta double excitations
+  ! =============================
+    
+  allocate( buffer($N_int,maxab),                                     &
+      singles_a(maxab),                                              &
+      singles_b(maxab),                                              &
+      doubles(maxab),                                                &
+      idx(maxab))
+
+  kcol_prev=-1
+
+  ASSERT (iend <= N_det)
+  ASSERT (istart > 0)
+  ASSERT (istep  > 0)
+
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=istart+ishift,iend,istep
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    if (kcol /= kcol_prev) then
+      call get_all_spin_singles_$N_int(                              &
+          psi_det_beta_unique, idx0,                                 &
+          tmp_det(1,2), N_det_beta_unique,                           &
+          singles_b, n_singles_b)
+    endif
+    kcol_prev = kcol
+
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
+
+    do i=1,n_singles_b
+      lcol = singles_b(i)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
+
+      l_a = psi_bilinear_matrix_columns_loc(lcol)
+      ASSERT (l_a <= N_det)
+
+      do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - l_a
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)
+
+        ASSERT (l_a <= N_det)
+        idx(j) = l_a
+        l_a = l_a+1
+      enddo
+      j = j-1
+
+      call get_all_spin_singles_$N_int(                              &
+          buffer, idx, tmp_det(1,1), j,                              &
+          singles_a, n_singles_a )
+
+      ! Loop over alpha singles
+      ! -----------------------
+
+      do k = 1,n_singles_a
+        l_a = singles_a(k)
+        ASSERT (l_a <= N_det)
+
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+        call i_h_j_double_alpha_beta_complex(tmp_det,tmp_det2,$N_int,hij)
+        v_t(k_a) = v_t(k_a) + hij * u_t(l_a)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO 
+
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=istart+ishift,iend,istep
+
+
+    ! Single and double alpha excitations
+    ! ===================================
+    
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    
+    ! Initial determinant is at k_b in beta-major representation
+    ! ----------------------------------------------------------------------
+    
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,1)
+    
+    ! Loop inside the beta column to gather all the connected alphas
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
+    do i=1,N_det_alpha_unique
+      if (l_a > N_det) exit
+      lcol = psi_bilinear_matrix_columns(l_a)
+      if (lcol /= kcol) exit
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow)
+      idx(i) = l_a
+      l_a = l_a+1
+    enddo
+    i = i-1
+    
+    !call get_all_spin_singles_and_doubles_$N_int(                    &
+    !    buffer, idx, spindet, i,                                     &
+    !    singles_a, doubles, n_singles_a, n_doubles )
+    call get_all_spin_singles_and_doubles(                           &
+        buffer, idx, spindet, $N_int, i,                             &
+        singles_a, doubles, n_singles_a, n_doubles )
+
+    ! Compute Hij for all alpha singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    do i=1,n_singles_a
+      l_a = singles_a(i)
+      ASSERT (l_a <= N_det)
+
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+      call i_h_j_single_spin_complex( tmp_det, tmp_det2, $N_int, 1, hij)
+
+      v_t(k_a) = v_t(k_a) + hij * u_t(l_a)
+    enddo
+
+    
+    ! Compute Hij for all alpha doubles
+    ! ----------------------------------
+    
+    do i=1,n_doubles
+      l_a = doubles(i)
+      ASSERT (l_a <= N_det)
+
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      call i_h_j_double_spin_complex( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
+      v_t(k_a) = v_t(k_a) + hij * u_t(l_a)
+    enddo
+    
+
+    ! Single and double beta excitations
+    ! ==================================
+
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    kcol = psi_bilinear_matrix_columns(k_a)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    
+    spindet(1:$N_int) = tmp_det(1:$N_int,2)
+    
+    ! Initial determinant is at k_b in beta-major representation
+    ! -----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a) 
+    ASSERT (k_b <= N_det)
+    
+    ! Loop inside the alpha row to gather all the connected betas
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
+    do i=1,N_det_beta_unique
+      if (l_b > N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l_b)
+      if (lrow /= krow) exit
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
+      idx(i) = l_b
+      l_b = l_b+1
+    enddo
+    i = i-1
+  
+    !call get_all_spin_singles_and_doubles_$N_int(                    &
+    !    buffer, idx, spindet, i,                                     &
+    !    singles_b, doubles, n_singles_b, n_doubles )
+    call get_all_spin_singles_and_doubles(                           &
+        buffer, idx, spindet, $N_int, i,                             &
+        singles_b, doubles, n_singles_b, n_doubles )
+    
+    ! Compute Hij for all beta singles
+    ! ----------------------------------
+    
+    tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    do i=1,n_singles_b
+      l_b = singles_b(i)
+      ASSERT (l_b <= N_det)
+
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+      call i_h_j_single_spin_complex( tmp_det, tmp_det2, $N_int, 2, hij)
+      l_a = psi_bilinear_matrix_transp_order(l_b)
+      ASSERT (l_a <= N_det)
+      v_t(k_a) = v_t(k_a) + hij * u_t(l_a)
+    enddo
+    
+    ! Compute Hij for all beta doubles
+    ! ----------------------------------
+    
+    do i=1,n_doubles
+      l_b = doubles(i)
+      ASSERT (l_b <= N_det)
+
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      call i_h_j_double_spin_complex( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
+      l_a = psi_bilinear_matrix_transp_order(l_b)
+      ASSERT (l_a <= N_det)
+
+      v_t(k_a) = v_t(k_a) + hij * u_t(l_a)
+    enddo
+
+
+    ! Diagonal contribution
+    ! =====================
+
+    
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+    
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+    
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    
+    !double precision, external :: diag_H_mat_elem, diag_S_mat_elem
+    double precision, external :: diag_H_mat_elem
+    hii = diag_H_mat_elem(tmp_det,$N_int) 
+    v_t(k_a) = v_t(k_a) + hii * u_t(k_a)
+
+  end do
+  !$OMP END DO 
+  deallocate(buffer, singles_a, singles_b, doubles, idx)
+  !$OMP END PARALLEL
+
+end
+
+SUBST [ N_int ]
+
+1;;
+2;;
+3;;
+4;;
+N_int;;
+
+END_TEMPLATE
+
diff --git a/src/green/lanczos.irp.f b/src/green/lanczos.irp.f
new file mode 100644
index 00000000..baf66d80
--- /dev/null
+++ b/src/green/lanczos.irp.f
@@ -0,0 +1,883 @@
+
+
+BEGIN_PROVIDER [ integer, n_green_vec ]
+  implicit none
+  BEGIN_DOC
+  ! number of particles/holes to use for spectral density calc.
+  ! just set to 2 for now (homo and lumo)
+  END_DOC
+  n_green_vec = 2
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, green_idx, (n_green_vec) ]
+&BEGIN_PROVIDER [ integer, green_idx_int, (n_green_vec) ]
+&BEGIN_PROVIDER [ integer, green_idx_bit, (n_green_vec) ]
+&BEGIN_PROVIDER [ integer, green_spin, (n_green_vec) ]
+&BEGIN_PROVIDER [ double precision, green_sign, (n_green_vec) ]
+  implicit none
+  BEGIN_DOC
+  ! description of particles/holes to be used in spectral density calculation
+  ! green_idx: orbital index of particle/hole
+  ! green_idx_{int,bit}: location of idx within determinant bitstring
+  ! green_spin: 1(alpha) or 2(beta)
+  ! green_sign: 1(particle) or -1(hole)
+  END_DOC
+  integer :: s1,s2,i1,i2
+  integer :: i
+
+  integer :: idx_homo_lumo(2), spin_homo_lumo(2)
+  logical :: has_idx,has_spin,has_sign,has_lanc
+  integer :: nlanc
+  ! needs psi_det, mo_num, N_int, mo_bielec_integral_jj, mo_mono_elec_integral_diag
+  call ezfio_has_green_green_idx(has_idx)
+  call ezfio_has_green_green_spin(has_spin)
+  call ezfio_has_green_green_sign(has_sign)
+!  call ezfio_has_green_n_lanczos_complete(has_lanc)
+  call ezfio_get_green_n_lanczos_complete(nlanc)
+  if (has_idx.and.has_spin.and.has_sign) then
+    print*,'reading idx,spin,sign'
+    call ezfio_get_green_green_idx(green_idx)
+    call ezfio_get_green_green_spin(green_spin)
+    call ezfio_get_green_green_sign(green_sign)
+  else if (nlanc.gt.0) then
+    stop 'problem with lanczos restart; need idx, spin, sign'
+  else
+    print*,'new lanczos calculation, finding homo/lumo'
+    call get_homo_lumo(psi_det(1:N_int,1:2,1),N_int,mo_num,idx_homo_lumo,spin_homo_lumo)
+
+    ! homo
+    green_idx(1)=idx_homo_lumo(1)
+    green_spin(1)=spin_homo_lumo(1)
+    green_sign(1)=-1.d0
+  
+    ! lumo
+    green_idx(2)=idx_homo_lumo(2)
+    green_spin(2)=spin_homo_lumo(2)
+    green_sign(2)=1.d0
+
+    call ezfio_set_green_green_idx(green_idx)
+    call ezfio_set_green_green_spin(green_spin)
+    call ezfio_set_green_green_sign(green_sign)
+  endif
+
+
+
+!  if (nlanc.gt.0) then
+! !   call ezfio_get_green_n_lanczos_complete(nlanc)
+!    print*,'restarting from previous lanczos',nlanc
+!    if (has_idx.and.has_spin.and.has_sign) then
+!      print*,'reading idx,spin,sign'
+!      call ezfio_get_green_green_idx(green_idx)
+!      call ezfio_get_green_green_spin(green_spin)
+!      call ezfio_get_green_green_sign(green_sign)
+!    else
+!      stop 'problem with lanczos restart; need idx, spin, sign'
+!    endif
+!  else
+!    print*,'new lanczos calculation, finding homo/lumo'
+!    call get_homo_lumo(psi_det(1:N_int,1:2,1),N_int,mo_num,idx_homo_lumo,spin_homo_lumo)
+!
+!    ! homo
+!    green_idx(1)=idx_homo_lumo(1)
+!    green_spin(1)=spin_homo_lumo(1)
+!    green_sign(1)=-1.d0
+!  
+!    ! lumo
+!    green_idx(2)=idx_homo_lumo(2)
+!    green_spin(2)=spin_homo_lumo(2)
+!    green_sign(2)=1.d0
+!
+!    call ezfio_set_green_green_idx(green_idx)
+!    call ezfio_set_green_green_spin(green_spin)
+!    call ezfio_set_green_green_sign(green_sign)
+!  endif
+
+  do i=1,n_green_vec
+    call get_orb_int_bit(green_idx(i),green_idx_int(i),green_idx_bit(i))
+    print*,i,green_idx(i),green_idx_int(i),green_idx_bit(i),green_spin(i),green_sign(i)
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, green_det_phase, (N_det,n_green_vec) ]
+  implicit none
+  BEGIN_DOC
+  ! for each det in psi, compute phase for each particle/hole excitation
+  ! each element should be +/-1 or 0
+  END_DOC
+  integer :: i
+  double precision :: phase_tmp(n_green_vec)
+  PROVIDE psi_det green_idx
+  
+  do i=1,N_det
+    call get_phase_hp(green_idx_int,green_idx_bit,green_spin,green_sign,psi_det(1,1,i),phase_tmp,N_int,n_green_vec)
+    green_det_phase(i,1:n_green_vec) = phase_tmp(1:n_green_vec)
+  enddo
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, u1_lanczos, (N_det,n_green_vec) ]
+  implicit none
+  BEGIN_DOC
+  ! initial lanczos vectors
+  ! must be normalized
+  END_DOC
+
+  integer :: i,j
+  
+  do j=1,n_green_vec
+    do i=1,N_det
+      u1_lanczos(i,j)=green_det_phase(i,j)*psi_coef(i,1)
+    enddo
+    call normalize_complex(u1_lanczos(:,j),N_det)
+  enddo
+
+END_PROVIDER
+
+! BEGIN_PROVIDER [ double precision, alpha_lanczos, (n_green_vec,n_lanczos_iter) ]
+!&BEGIN_PROVIDER [ double precision, beta_lanczos, (n_green_vec,n_lanczos_iter) ]
+ BEGIN_PROVIDER [ double precision, alpha_lanczos, (n_lanczos_iter,n_green_vec) ]
+&BEGIN_PROVIDER [ double precision, beta_lanczos, (n_lanczos_iter,n_green_vec) ]
+&BEGIN_PROVIDER [ complex*16, un_lanczos, (N_det,n_green_vec) ]
+&BEGIN_PROVIDER [ complex*16, vn_lanczos, (N_det,n_green_vec) ]
+&BEGIN_PROVIDER [ double precision, lanczos_eigvals, (n_lanczos_iter,n_green_vec) ]
+  implicit none
+  BEGIN_DOC
+  ! for each particle/hole:
+  ! provide alpha and beta for tridiagonal form of H
+  ! un, vn lanczos vectors from latest iteration
+  ! lanczos_eigvals: eigenvalues of tridiagonal form of H
+  END_DOC
+  PROVIDE lanczos_debug_print n_lanczos_debug
+  complex*16, allocatable :: work(:,:)
+!  double precision :: alpha_tmp,beta_tmp
+  double precision, allocatable :: alpha_tmp(:),beta_tmp(:)
+  double precision, allocatable :: alpha_tmp_vec(:,:), beta_tmp_vec(:,:)
+  integer :: i,j
+  integer :: n_lanc_new_tmp, n_lanc_old_tmp
+  call ezfio_get_green_n_lanczos_iter(n_lanc_new_tmp)
+  call ezfio_get_green_n_lanczos_complete(n_lanc_old_tmp)
+ 
+  if ((n_lanczos_complete).gt.0) then
+!    allocate(alpha_tmp_vec(n_green_vec,n_lanczos_complete),beta_tmp_vec(n_green_vec,n_lanczos_complete))
+    allocate(alpha_tmp_vec(n_lanczos_complete,n_green_vec),beta_tmp_vec(n_lanczos_complete,n_green_vec))
+    logical :: has_un_lanczos, has_vn_lanczos
+    call ezfio_has_green_un_lanczos(has_un_lanczos)
+    call ezfio_has_green_vn_lanczos(has_vn_lanczos)
+    if (has_un_lanczos.and.has_vn_lanczos) then
+      call ezfio_get_green_un_lanczos(un_lanczos)
+      call ezfio_get_green_vn_lanczos(vn_lanczos)
+!      if (lanczos_debug_print) then
+!        print*,'uu,vv read from disk'
+!        do i=1,n_lanczos_debug
+!          write(6,'(4(E25.15))')un_lanczos(i),vn_lanczos(i)
+!        enddo
+!      endif
+    else
+      print*,'problem reading lanczos vectors for restart'
+      stop
+    endif
+    logical :: has_alpha_lanczos, has_beta_lanczos
+    call ezfio_has_green_alpha_lanczos(has_alpha_lanczos)
+    call ezfio_has_green_beta_lanczos(has_beta_lanczos)
+    if (has_alpha_lanczos.and.has_beta_lanczos) then
+      call ezfio_set_green_n_lanczos_iter(n_lanc_old_tmp)
+      call ezfio_get_green_alpha_lanczos(alpha_tmp_vec)
+      call ezfio_get_green_beta_lanczos(beta_tmp_vec)
+      call ezfio_set_green_n_lanczos_iter(n_lanc_new_tmp)
+      do j=1,n_green_vec
+        do i=1,n_lanczos_complete
+          alpha_lanczos(i,j)=alpha_tmp_vec(i,j)
+          beta_lanczos(i,j)=beta_tmp_vec(i,j)
+        enddo
+      enddo
+    else
+      print*,'problem reading lanczos alpha, beta for restart'
+      stop
+    endif
+    deallocate(alpha_tmp_vec,beta_tmp_vec)
+  else
+    call write_time(6)
+    print*,'no saved lanczos vectors. starting lanczos'
+    PROVIDE u1_lanczos
+    un_lanczos=u1_lanczos
+    allocate(work(N_det,n_green_vec),alpha_tmp(n_green_vec),beta_tmp(n_green_vec))
+    call lanczos_h_init_hp(un_lanczos,vn_lanczos,work,N_det,alpha_tmp,beta_tmp,&
+                           n_green_vec,green_spin,green_sign,green_idx)
+    do i=1,n_green_vec
+      alpha_lanczos(1,i)=alpha_tmp(i)
+      beta_lanczos(1,i)=beta_tmp(i)
+    enddo
+    n_lanczos_complete=1
+    deallocate(work,alpha_tmp,beta_tmp)
+  endif
+
+  allocate(work(N_det,n_green_vec),alpha_tmp(n_green_vec),beta_tmp(n_green_vec))
+  do i=n_lanczos_complete+1,n_lanczos_iter
+    call write_time(6)
+    print*,'starting lanczos iteration',i
+    call lanczos_h_step_hp(un_lanczos,vn_lanczos,work,N_det,alpha_tmp,beta_tmp,&
+                           n_green_vec,green_spin,green_sign,green_idx)
+    do j=1,n_green_vec
+      alpha_lanczos(i,j)=alpha_tmp(j)
+      beta_lanczos(i,j)=beta_tmp(j)
+    enddo
+    n_lanczos_complete=n_lanczos_complete+1
+  enddo
+  deallocate(work,alpha_tmp,beta_tmp)
+
+  call ezfio_set_green_alpha_lanczos(alpha_lanczos)
+  call ezfio_set_green_beta_lanczos(beta_lanczos)
+  call ezfio_set_green_un_lanczos(un_lanczos)
+  call ezfio_set_green_vn_lanczos(vn_lanczos)
+  call ezfio_set_green_n_lanczos_complete(n_lanczos_complete)
+
+  call diag_lanczos_vals_hp(alpha_lanczos, beta_lanczos, n_lanczos_complete, lanczos_eigvals,&
+                            n_lanczos_iter,n_green_vec)
+  call ezfio_set_green_lanczos_eigvals(lanczos_eigvals)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, delta_omega ]
+  implicit none
+  BEGIN_DOC
+  ! step size between frequency points for spectral density calculation
+  ! calculated from min, max, and number of steps
+  END_DOC
+  delta_omega=(omega_max-omega_min)/n_omega
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, omega_list, (n_omega) ]
+  implicit none
+  BEGIN_DOC
+  ! list of frequencies at which to compute spectral density
+  END_DOC
+
+  integer :: i
+  double precision :: omega_i
+  PROVIDE delta_omega
+  do i=1,n_omega
+    omega_list(i) = omega_min + (i-1)*delta_omega
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, spectral_lanczos, (n_omega,n_green_vec) ]
+  implicit none
+  BEGIN_DOC
+  ! spectral density A(omega) calculated from lanczos alpha/beta
+  ! calculated for n_omega points between omega_min and omega_max
+  END_DOC
+
+  integer :: i,j
+  double precision :: omega_i
+  complex*16 :: z_i
+  !double precision :: spec_lanc_rev
+  double precision :: spec_lanc_rev_sign
+  logical :: has_ci_energy
+  double precision :: ref_energy_0
+  PROVIDE delta_omega alpha_lanczos beta_lanczos omega_list
+  call ezfio_has_fci_energy(has_ci_energy)
+  if (has_ci_energy) then
+    call ezfio_get_fci_energy(ref_energy_0)
+  else
+    print*,'no reference energy from full_ci_zmq, exiting'
+    stop
+  endif
+
+
+  do i=1,n_omega
+    omega_i = omega_list(i)
+    z_i = dcmplx(omega_i,gf_epsilon)
+    do j=1,n_green_vec
+!      spectral_lanczos(i,j) = spec_lanc_rev(n_lanczos_iter,alpha_lanczos(:,j),beta_lanczos(:,j),z_i)
+      spectral_lanczos(i,j) = spec_lanc_rev_sign(n_lanczos_iter,    &
+                                                alpha_lanczos(:,j), &
+                                                beta_lanczos(:,j),  &
+                                                z_i - green_sign(j)*ref_energy_0, &
+                                                green_sign(j))
+    enddo
+  enddo
+
+END_PROVIDER
+
+double precision function spec_lanc(n_lanc_iter,alpha,beta,z)
+  include 'constants.include.F'
+  implicit none
+  BEGIN_DOC
+  ! input:
+  !   alpha, beta: from tridiagonal form of H (obtain via lanczos)
+  !                beta and alpha same size (beta(1) is not used)
+  !   n_lanc_iter: size of alpha, beta
+  !             z: omega + i*epsilon
+  !                omega is frequency for which spectral density is to be computed
+  !                epsilon is magnitude of infinitesimal imaginary term
+  ! output:
+  !     spec_lanc: spectral density A(omega)
+  !
+  ! uses inv_pi=(1.d0/pi) from constants 
+  END_DOC
+  integer, intent(in) :: n_lanc_iter
+  double precision, intent(in) :: alpha(n_lanc_iter), beta(n_lanc_iter)
+  complex*16, intent(in) :: z
+
+  complex*16 bigAj2,bigAj1,bigAj0
+  complex*16 bigBj2,bigBj1,bigBj0
+  integer :: j
+  ! init for j=1
+  ! bigAj2 is A(j-2)
+  ! bigAj1 is A(j-1)
+  ! etc.
+
+  bigAj2=1.d0        ! A(-1)
+  bigAj1=0.d0        ! A(0)
+  bigAj0=1.d0        ! A(1)
+
+  bigBj2=0.d0        ! B(-1)
+  bigBj1=1.d0        ! B(0)
+  bigBj0=z-alpha(1)  ! B(1)
+
+  do j=2,n_lanc_iter
+    bigAj2=bigAj1
+    bigAj1=bigAj0
+    bigAj0=(z-alpha(j))*bigAj1 - beta(j)**2*bigAj2
+    
+    bigBj2=bigBj1
+    bigBj1=bigBj0
+    bigBj0=(z-alpha(j))*bigBj1 - beta(j)**2*bigBj2
+  enddo
+  spec_lanc=-imag(bigAj0/bigBj0)*inv_pi
+end
+
+double precision function spec_lanc_rev(n_lanc_iter,alpha,beta,z)
+  include 'constants.include.F'
+  implicit none
+  BEGIN_DOC
+  ! reverse iteration is more numerically stable
+  ! input:
+  !   alpha, beta: from tridiagonal form of H (obtain via lanczos)
+  !                beta and alpha same size (beta(1) is not used)
+  !   n_lanc_iter: size of alpha, beta
+  !             z: omega + i*epsilon
+  !                omega is frequency for which spectral density is to be computed
+  !                epsilon is magnitude of infinitesimal imaginary term
+  ! output:
+  !     spec_lanc: spectral density A(omega)
+  !
+  ! uses inv_pi=(1.d0/pi) from constants 
+  END_DOC
+  integer, intent(in) :: n_lanc_iter
+  double precision, intent(in) :: alpha(n_lanc_iter), beta(n_lanc_iter)
+  complex*16, intent(in) :: z
+
+  complex*16 :: tmp
+  integer :: j
+
+  tmp=(0.d0,0.d0)
+  do j=n_lanc_iter,2,-1
+    tmp=-beta(j)**2/(z-alpha(j)+tmp)
+  enddo
+  tmp=1.d0/(z-alpha(1)+tmp)
+  spec_lanc_rev=-imag(tmp)*inv_pi
+end
+
+double precision function spec_lanc_rev_sign(n_lanc_iter,alpha,beta,z,g_sign)
+  include 'constants.include.F'
+  implicit none
+  BEGIN_DOC
+  ! reverse iteration is more numerically stable
+  ! input:
+  !   alpha, beta: from tridiagonal form of H (obtain via lanczos)
+  !                beta and alpha same size (beta(1) is not used)
+  !   n_lanc_iter: size of alpha, beta
+  !             z: omega + i*epsilon
+  !                omega is frequency for which spectral density is to be computed
+  !                epsilon is magnitude of infinitesimal imaginary term
+  ! output:
+  !     spec_lanc: spectral density A(omega)
+  !
+  ! uses inv_pi=(1.d0/pi) from constants 
+  END_DOC
+  integer, intent(in) :: n_lanc_iter
+  double precision, intent(in) :: alpha(n_lanc_iter), beta(n_lanc_iter)
+  complex*16, intent(in) :: z
+  double precision, intent(in) :: g_sign
+
+  complex*16 :: tmp
+  integer :: j
+
+  tmp=(0.d0,0.d0)
+  do j=n_lanc_iter,2,-1
+    tmp=-beta(j)**2/(z+g_sign*alpha(j)+tmp)
+  enddo
+  tmp=1.d0/(z+g_sign*alpha(1)+tmp)
+  spec_lanc_rev_sign=-imag(tmp)*inv_pi
+end
+
+
+subroutine lanczos_h_init_hp(uu,vv,work,sze,alpha_i,beta_i,ng,spin_hp,sign_hp,idx_hp)
+  implicit none
+  integer, intent(in) :: sze,ng
+  complex*16, intent(in)    :: uu(sze,ng)
+  complex*16, intent(out)   :: vv(sze,ng)
+  complex*16 :: work(sze,ng)
+  double precision, intent(out) :: alpha_i(ng), beta_i(ng)
+  integer, intent(in) :: spin_hp(ng), idx_hp(ng)
+  double precision, intent(in) ::  sign_hp(ng)
+
+  double precision, external :: dznrm2
+  complex*16, external :: u_dot_v_complex
+  integer :: i,j
+
+  BEGIN_DOC
+  ! initial step for lanczos tridiagonalization of H for multiple holes/particles
+  ! uu is array of initial vectors u1 (creation/annihilation operator applied to psi)
+  ! output vv is array of lanczos v1 (one for each hole/particle)
+  END_DOC
+
+  print *,'starting lanczos'
+  print *,'sze = ',sze
+
+  ! |uu> is |u(1)>
+
+  ! |w(1)> = H|u(1)>
+  ! |work> is now |w(1)>
+  call compute_hu_hp(uu,work,ng,sze,spin_hp,sign_hp,idx_hp)
+
+  ! alpha(n+1) = <u(n+1)|w(n+1)>
+  do i=1,ng
+    alpha_i(i)=real(u_dot_v_complex(uu(1:sze,i),work(1:sze,i),sze))
+  enddo
+
+  do j=1,ng
+    do i=1,sze
+      vv(i,j)=work(i,j)-alpha_i(j)*uu(i,j)
+!      write(6,'(7(E25.15))')uu(i,j),vv(i,j),work(i,j),alpha_i(j)
+    enddo
+  enddo
+  
+  beta_i=0.d0
+  ! |vv> is |v(1)>
+  ! |uu> is |u(1)>
+end
+
+subroutine lanczos_h_step_hp(uu,vv,work,sze,alpha_i,beta_i,ng,spin_hp,sign_hp,idx_hp)
+  implicit none
+  integer, intent(in) :: sze,ng
+  complex*16, intent(inout) :: uu(sze,ng),vv(sze,ng)
+  complex*16, intent(out) :: work(sze,ng)
+  double precision, intent(out) :: alpha_i(ng), beta_i(ng)
+  integer, intent(in) :: spin_hp(ng), idx_hp(ng)
+  double precision, intent(in) :: sign_hp(ng)
+
+  double precision, external :: dznrm2
+  complex*16, external :: u_dot_v_complex
+  integer :: i,j
+  complex*16 :: tmp_c16
+  BEGIN_DOC
+  ! lanczos tridiagonalization of H
+  ! n_lanc_iter is number of lanczos iterations
+  ! u1 is initial lanczos vector
+  ! u1 should be normalized
+  END_DOC
+
+  ! |vv> is |v(n)>
+  ! |uu> is |u(n)>
+
+  ! compute beta(n+1)
+  do j=1,ng
+    beta_i(j)=dznrm2(sze,vv(:,j),1)
+  ! |vv> is now |u(n+1)>
+    call zdscal(sze,(1.d0/beta_i(j)),vv(:,j),1)
+  enddo
+
+  ! |w(n+1)> = H|u(n+1)>
+  ! |work> is now |w(n+1)>
+  call compute_hu_hp(vv,work,ng,sze,spin_hp,sign_hp,idx_hp)
+
+  ! alpha(n+1) = <u(n+1)|w(n+1)>
+  do i=1,ng
+    alpha_i(i)=real(u_dot_v_complex(vv(1:sze,i),work(1:sze,i),sze))
+  enddo
+
+  do j=1,ng
+    do i=1,sze
+      tmp_c16=work(i,j)-alpha_i(j)*vv(i,j)-beta_i(j)*uu(i,j)
+      uu(i,j)=vv(i,j)
+      vv(i,j)=tmp_c16
+    enddo
+  enddo
+  ! |vv> is |v(n+1)>
+  ! |uu> is |u(n+1)>
+end
+
+
+subroutine lanczos_h_init(uu,vv,work,sze,alpha_i,beta_i)
+  implicit none
+  integer, intent(in) :: sze
+  complex*16, intent(inout) :: uu(sze)
+  complex*16, intent(out)   :: vv(sze)
+  complex*16 :: work(sze)
+  double precision, intent(out) :: alpha_i, beta_i
+
+  double precision, external :: dznrm2
+  complex*16, external :: u_dot_v_complex
+  integer :: i
+
+  BEGIN_DOC
+  ! lanczos tridiagonalization of H
+  ! n_lanc_iter is number of lanczos iterations
+  ! u1 is initial lanczos vector
+  ! u1 should be normalized
+  END_DOC
+
+  print *,'starting lanczos'
+  print *,'sze = ',sze
+  ! exit if u1 is not normalized
+!  beta_norm = dznrm2(h_size,u1,1)
+!  if (dabs(beta_norm-1.d0) .gt. 1.d-6) then
+!    print *, 'Error: initial Lanczos vector is not normalized'
+!    stop -1
+!  endif
+
+  ! |uu> is |u(1)>
+
+  ! |w(1)> = H|u(1)>
+  ! |work> is now |w(1)>
+  call compute_hu(uu,work,sze)
+
+  ! alpha(n+1) = <u(n+1)|w(n+1)>
+  alpha_i=real(u_dot_v_complex(uu,work,sze))
+
+  do i=1,sze
+    vv(i)=work(i)-alpha_i*uu(i)
+  enddo
+  beta_i=0.d0
+  if (lanczos_debug_print) then
+    print*,'init uu,vv,work'
+    do i=1,n_lanczos_debug
+      write(6,'(6(E25.15))')uu(i),vv(i),work(i)
+    enddo
+  endif
+  ! |vv> is |v(1)>
+  ! |uu> is |u(1)>
+end
+
+subroutine lanczos_h_step(uu,vv,work,sze,alpha_i,beta_i)
+  implicit none
+  integer, intent(in) :: sze
+  complex*16, intent(inout) :: uu(sze),vv(sze)
+  complex*16, intent(out) :: work(sze)
+  double precision, intent(out) :: alpha_i, beta_i
+
+  double precision, external :: dznrm2
+  complex*16, external :: u_dot_v_complex
+  integer :: i
+  complex*16 :: tmp_c16
+  BEGIN_DOC
+  ! lanczos tridiagonalization of H
+  ! n_lanc_iter is number of lanczos iterations
+  ! u1 is initial lanczos vector
+  ! u1 should be normalized
+  END_DOC
+
+  ! exit if u1 is not normalized
+!  beta_norm = dznrm2(h_size,u1,1)
+!  if (dabs(beta_norm-1.d0) .gt. 1.d-6) then
+!    print *, 'Error: initial Lanczos vector is not normalized'
+!    stop -1
+!  endif
+
+  ! |vv> is |v(n)>
+  ! |uu> is |u(n)>
+
+  ! compute beta(n+1)
+  beta_i=dznrm2(sze,vv,1)
+  if (lanczos_debug_print) then
+    print*,'uu,vv in'
+    do i=1,n_lanczos_debug
+      write(6,'(4(E25.15))')uu(i),vv(i)
+    enddo
+  endif
+  ! |vv> is now |u(n+1)>
+  call zdscal(sze,(1.d0/beta_i),vv,1)
+
+  ! |w(n+1)> = H|u(n+1)>
+  ! |work> is now |w(n+1)>
+  call compute_hu(vv,work,sze)
+
+  if (lanczos_debug_print) then
+    print*,'vv,work'
+    do i=1,n_lanczos_debug
+      write(6,'(4(E25.15))')vv(i),work(i)
+    enddo
+  endif
+
+  ! alpha(n+1) = <u(n+1)|w(n+1)>
+  alpha_i=real(u_dot_v_complex(vv,work,sze))
+
+  do i=1,sze
+    tmp_c16=work(i)-alpha_i*vv(i)-beta_i*uu(i)
+    uu(i)=vv(i)
+    vv(i)=tmp_c16
+  enddo
+  ! |vv> is |v(n+1)>
+  ! |uu> is |u(n+1)>
+end
+
+
+
+subroutine lanczos_h(n_lanc_iter,alpha,beta,u1)
+  implicit none
+  integer, intent(in) :: n_lanc_iter
+  double precision, intent(out) :: alpha(n_lanc_iter), beta(n_lanc_iter)
+  complex*16, intent(in) :: u1(N_det)
+  integer :: h_size
+  double precision :: beta_norm, beta_norm_inv
+  complex*16, allocatable :: vec1(:), vec2(:), vec3(:)
+  complex*16 :: vec_tmp
+  double precision, external :: dznrm2
+  complex*16, external :: u_dot_v_complex
+
+  integer :: i,j,l
+  h_size=N_det
+  BEGIN_DOC
+  ! lanczos tridiagonalization of H
+  ! n_lanc_iter is number of lanczos iterations
+  ! u1 is initial lanczos vector
+  ! u1 should be normalized
+  END_DOC
+
+  print *,'starting lanczos'
+  print *,'h_size = ',h_size
+!  print *,'initial vector:'
+!  do i=1,h_size
+!    print *,u1(i)
+!  enddo
+  ! exit if u1 is not normalized
+  beta_norm = dznrm2(h_size,u1,1)
+  if (dabs(beta_norm-1.d0) .gt. 1.d-6) then
+    print *, 'Error: initial Lanczos vector is not normalized'
+    stop -1
+  endif
+
+  allocate(vec1(h_size),  &
+           vec2(h_size),  &
+           vec3(h_size))  
+ 
+  do i=1,h_size
+    vec1(i)=u1(i)
+  enddo
+
+  ! |w1> = H|u1>
+  ! |vec2> = H|vec1>
+  call compute_hu(vec1,vec2,h_size)!! TODO: not implemented
+
+  ! alpha(1) = <u1|H|u1> = <u1|w1>
+  !          = <vec1|vec2>
+  alpha(1)=real(u_dot_v_complex(vec1,vec2,h_size)) 
+
+  ! |v1> = |w1> - alpha(1)*|u1>
+  ! |vec3> = |vec2> - alpha(1)*|vec1>
+  do i=1,h_size
+    vec3(i)=vec2(i)-alpha(1)*vec1(i)
+  enddo
+  do j=2,n_lanc_iter
+    call write_time(6)
+    print *,'starting lanczos iteration:',j
+    !! vec1 is |u(j-1)>
+    !! vec3 is |v(j-1)>
+
+    ! beta(j) = sqrt(<v(j-1)|v(j-1)>)
+    beta_norm=dznrm2(h_size,vec3,1)
+
+    ! TODO: check for beta=0?
+    beta_norm_inv=1.d0/beta_norm
+
+    ! normalize |v(j-1)> to form |u(j)>
+    call zdscal(h_size,beta_norm_inv,vec3,1)
+    !! vec3 is |u(j)>
+
+    ! |w(j)> = H|u(j)>
+    call compute_hu(vec3,vec2,h_size)!! TODO: not implemented
+    !! vec2 is |w(j)>
+
+    alpha(j)=real(u_dot_v_complex(vec2,vec3,h_size)) 
+    beta(j)=beta_norm
+
+    ! |v(j)> = |w(j)> - alpha(j)*|u(j)> - beta(j)*|u(j-1)>
+    do l=1,h_size
+      vec_tmp=vec2(l)-alpha(j)*vec3(l)-beta(j)*vec1(l)
+      vec1(l)=vec3(l)
+      vec3(l)=vec_tmp
+    enddo
+    !! vec1 is |u(j)>
+    !! vec3 is |v(j)>
+  enddo
+
+end
+
+
+subroutine compute_hu_hp(vec1,vec2,n_hp,h_size,spin_hp,sign_hp,idx_hp)
+  implicit none
+  integer, intent(in)     :: h_size,n_hp
+  complex*16, intent(in)  :: vec1(h_size,n_hp)
+  complex*16, intent(out) :: vec2(h_size,n_hp)
+  integer, intent(in) :: spin_hp(n_hp), idx_hp(n_hp)
+  double precision, intent (in) :: sign_hp(n_hp)
+  complex*16 :: vec1_tmp(h_size,n_hp)
+  integer :: i,j
+  BEGIN_DOC
+  ! |vec2> = H|vec1>
+  !
+  ! TODO: implement
+  ! maybe reuse parts of H_S2_u_0_nstates_{openmp,zmq}?
+  END_DOC
+
+  vec1_tmp(1:h_size,1:n_hp) = vec1(1:h_size,1:n_hp)
+  call h_u_0_hp_openmp(vec2,vec1_tmp,n_hp,h_size,spin_hp,sign_hp,idx_hp)
+
+  do j=1,n_hp
+    do i=1,h_size
+      if (cdabs(vec1_tmp(i,j) - vec1(i,j)).gt.1.d-6) then
+        print*,'ERROR: vec1 was changed by h_u_0_openmp'
+      endif
+    enddo
+  enddo
+end
+
+subroutine compute_hu(vec1,vec2,h_size)
+  implicit none
+  integer, intent(in)     :: h_size
+  complex*16, intent(in)  :: vec1(h_size)
+  complex*16, intent(out) :: vec2(h_size)
+  complex*16 :: vec1_tmp(h_size)
+  integer :: i
+  BEGIN_DOC
+  ! |vec2> = H|vec1>
+  !
+  ! TODO: implement
+  ! maybe reuse parts of H_S2_u_0_nstates_{openmp,zmq}?
+  END_DOC
+
+  vec1_tmp(1:h_size) = vec1(1:h_size)
+  call h_u_0_openmp(vec2,vec1_tmp,h_size)
+
+  do i=1,h_size
+    if (cdabs(vec1_tmp(i) - vec1(i)).gt.1.d-6) then
+      print*,'ERROR: vec1 was changed by h_u_0_openmp'
+    endif
+  enddo
+end
+
+subroutine compute_hu2(vec1,vec2,h_size)
+  implicit none
+  integer, intent(in)     :: h_size
+  complex*16, intent(in)  :: vec1(h_size)
+  complex*16, intent(out) :: vec2(h_size)
+  complex*16, allocatable :: u_tmp(:,:), s_tmp(:,:),v_tmp(:,:)
+  integer :: i
+  BEGIN_DOC
+  ! |vec2> = H|vec1>
+  !
+  ! TODO: implement
+  ! maybe reuse parts of H_S2_u_0_nstates_{openmp,zmq}?
+  END_DOC
+
+  allocate(u_tmp(1,h_size),s_tmp(1,h_size),v_tmp(1,h_size))
+
+  u_tmp(1,1:h_size) = vec1(1:h_size)
+  call h_s2_u_0_nstates_openmp(v_tmp,s_tmp,u_tmp,1,h_size)
+
+  do i=1,h_size
+    if (cdabs(u_tmp(1,i) - vec1(i)).gt.1.d-6) then
+      print*,'ERROR: vec1 was changed by h_u_0_openmp'
+    endif
+  enddo
+  vec2(1:h_size)=v_tmp(1,1:h_size)
+  deallocate(u_tmp,v_tmp,s_tmp)
+end
+
+
+
+subroutine diag_lanczos_vals_vecs(alpha, beta, nlanc, vals, vecs, sze)
+  implicit none
+  BEGIN_DOC
+  ! diagonalization of tridiagonal form of H
+  ! this returns eigenvalues and eigenvectors in vals,vecs
+  END_DOC
+  integer, intent(in) :: nlanc,sze
+  double precision, intent(in) :: alpha(sze), beta(sze)
+  double precision, intent(out) :: vals(sze), vecs(sze,sze)
+  double precision :: work(2*nlanc-2), beta_tmp(nlanc-1)
+  integer :: i,info
+  
+  vals(1)=alpha(1)
+  do i=2,nlanc
+    vals(i)=alpha(i)
+    beta_tmp(i-1)=beta(i)
+  enddo
+
+  call dstev('V', nlanc, vals, beta_tmp, vecs, sze, work, info)
+  if (info.gt.0) then
+    print *,'WARNING: diagonalization of tridiagonal form of H did not converge'
+  else if (info.lt.0) then
+    print *,'WARNING: argument to dstev had illegal value'
+  endif
+end
+
+subroutine diag_lanczos_vals_hp(alpha, beta, nlanc, vals, sze,ng)
+  implicit none
+  BEGIN_DOC
+  ! diagonalization of tridiagonal form of H
+  ! this returns eigenvalues in vals
+  END_DOC
+  integer, intent(in) :: nlanc,sze,ng
+  !double precision, intent(in) :: alpha(ng,sze), beta(sze)
+  double precision, intent(in) :: alpha(sze,ng), beta(sze,ng)
+  double precision, intent(out) :: vals(sze,ng)
+  double precision :: work(1), beta_tmp(nlanc-1), vecs(1)
+  integer :: i,info,ig
+  
+  do ig=1,ng
+    vals(1,ig)=alpha(1,ig)
+    do i=2,nlanc
+      vals(i,ig)=alpha(i,ig)
+      beta_tmp(i-1)=beta(i,ig)
+    enddo
+  
+    call dstev('N', nlanc, vals(:,ig), beta_tmp, vecs, 1, work, info)
+    if (info.gt.0) then
+      print *,'WARNING: diagonalization of tridiagonal form of H did not converge'
+    else if (info.lt.0) then
+      print *,'WARNING: argument to dstev had illegal value'
+    endif
+  enddo
+end
+subroutine diag_lanczos_vals(alpha, beta, nlanc, vals, sze)
+  implicit none
+  BEGIN_DOC
+  ! diagonalization of tridiagonal form of H
+  ! this returns eigenvalues in vals
+  END_DOC
+  integer, intent(in) :: nlanc,sze
+  double precision, intent(in) :: alpha(sze), beta(sze)
+  double precision, intent(out) :: vals(sze)
+  double precision :: work(1), beta_tmp(nlanc-1), vecs(1)
+  integer :: i,info
+  
+  vals(1)=alpha(1)
+  do i=2,nlanc
+    vals(i)=alpha(i)
+    beta_tmp(i-1)=beta(i)
+  enddo
+
+  call dstev('N', nlanc, vals, beta_tmp, vecs, 1, work, info)
+  if (info.gt.0) then
+    print *,'WARNING: diagonalization of tridiagonal form of H did not converge'
+  else if (info.lt.0) then
+    print *,'WARNING: argument to dstev had illegal value'
+  endif
+end
diff --git a/src/green/plot-spec-dens.py b/src/green/plot-spec-dens.py
new file mode 100755
index 00000000..bf4f2294
--- /dev/null
+++ b/src/green/plot-spec-dens.py
@@ -0,0 +1,90 @@
+#!/bin/env python
+
+import gzip
+import sys
+from math import pi
+inv_pi = 1.0/pi
+
+def spec_dens(alpha,beta,z0,g_sign,e_shift):
+    sze=len(alpha)
+    sze_b=len(beta)
+    if (sze != sze_b):
+        print('Error: size mismatch',sze,sze_b)
+        sys.exit(1)
+    z=z0-g_sign*e_shift
+    tmp=0.0+0.0j
+    #for ai,bi in zip(reversed(a),reversed(b))
+    for i in range(sze-1,0,-1):
+        tmp=-(beta[i]**2)/(z+g_sign*alpha[i]+tmp)
+    tmp=1.0/(z+g_sign*alpha[0]+tmp)
+    return -1.0 * tmp.imag * inv_pi
+
+def printspec(ezdir,wmin,wmax,nw,eps):
+    gdir=ezdir+'/green/'
+    with open(gdir+'n_green_vec') as infile:
+        ngvec=int(infile.readline().strip())
+    with open(ezdir+'/fci/energy') as infile:
+        e0=float(infile.readline().strip())
+    with open(gdir+'n_lanczos_complete') as infile:
+        nlanc=int(infile.readline().strip())
+    
+    with gzip.open(gdir+'green_sign.gz') as infile:
+        gsign0=infile.read().split()
+    
+    with gzip.open(gdir+'alpha_lanczos.gz') as infile:
+        adata0=infile.read().split()
+    with gzip.open(gdir+'beta_lanczos.gz') as infile:
+        bdata0=infile.read().split()
+    
+    adim=int(adata0.pop(0))
+    bdim=int(bdata0.pop(0))
+    gsigndim=int(gsign0.pop(0))
+    assert adim==2, 'dimension of alpha_lanczos should be 2'
+    assert bdim==2, 'dimension of beta_lanczos should be 2'
+    assert gsigndim==1, 'dimension of green_sign should be 1'
+    
+    ngvec_2=int(gsign0.pop(0))
+    assert ngvec_2==ngvec, 'problem with size of green_sign.gz'
+    
+    ashape=tuple(map(int,adata0[:adim]))
+    bshape=tuple(map(int,bdata0[:bdim]))
+    assert ashape==(nlanc,ngvec), 'shape of alpha_lanczos should be (nlanc, ngvec)'
+    assert bshape==(nlanc,ngvec), 'shape of  beta_lanczos should be (nlanc, ngvec)'
+    
+    amat=[]
+    for xi in range(ngvec):
+        amat.append(list(map(float,adata0[adim+xi*nlanc:adim+(xi+1)*nlanc])))
+    
+    bmat=[]
+    b2mat=[]
+    for xi in range(ngvec):
+        #bmat.append(list(map(float,bdata0[bdim+xi*nlanc:bdim+(xi+1)*nlanc])))
+        b_tmp=list(map(float,bdata0[bdim+xi*nlanc:bdim+(xi+1)*nlanc]))
+        b2_tmp=[i*i for i in b_tmp]
+        bmat.append(b_tmp)
+        b2mat.append(b2_tmp)
+    
+    gsign=list(map(float,gsign0))
+    dw=(wmax-wmin)/(nw-1)
+    wlist = [wmin+iw*dw for iw in range(nw)]
+    densmat=[]
+    for ivec in range(ngvec):
+        densmat.append([spec_dens(amat[ivec],bmat[ivec],iw+1.j*eps,gsign[ivec],e0) for iw in wlist])
+
+    for i,dd in enumerate(zip(*densmat)):
+        print(('{:15.6E}'+ngvec*'{:25.15E}').format(wlist[i],*dd))
+
+if __name__ == '__main__':
+
+    if len(sys.argv) != 6:
+        print('bad args')
+        print('USAGE: plot-spec-dens.py ezfio omega_min omega_max n_omega epsilon')
+        sys.exit(1)
+    ezfio=sys.argv[1]
+    wmin=float(sys.argv[2])
+    wmax=float(sys.argv[3])
+    nw=int(sys.argv[4])
+    eps=float(sys.argv[5]) 
+    printspec(ezfio,wmin,wmax,nw,eps)
+
+
diff --git a/src/green/print_dets_test.irp.f b/src/green/print_dets_test.irp.f
new file mode 100644
index 00000000..6466141e
--- /dev/null
+++ b/src/green/print_dets_test.irp.f
@@ -0,0 +1,15 @@
+program print_dets_test
+ implicit none
+ read_wf = .True.
+ touch read_wf
+ call routine
+
+end
+
+subroutine routine
+ use bitmasks
+ implicit none
+ integer :: i
+ read*,i
+ print*,psi_det(:,:,i)
+end
diff --git a/src/green/print_e_mo_debug.irp.f b/src/green/print_e_mo_debug.irp.f
new file mode 100644
index 00000000..1fe41e34
--- /dev/null
+++ b/src/green/print_e_mo_debug.irp.f
@@ -0,0 +1,15 @@
+program print_e_mo_debug
+ implicit none
+ read_wf = .True.
+ touch read_wf
+ call routine
+
+end
+
+subroutine routine
+ use bitmasks
+ implicit none
+ integer :: i
+ read*,i
+ call print_mo_energies(psi_det(:,:,i),N_int,mo_num)
+end
diff --git a/src/green/print_h_debug.irp.f b/src/green/print_h_debug.irp.f
new file mode 100644
index 00000000..4dd394d7
--- /dev/null
+++ b/src/green/print_h_debug.irp.f
@@ -0,0 +1,178 @@
+program print_h_debug
+ implicit none
+ read_wf = .True.
+ touch read_wf
+ call routine
+
+end
+
+subroutine routine
+ use bitmasks
+ implicit none
+ integer :: i,j
+ integer, allocatable :: H_matrix_degree(:,:)
+ double precision, allocatable :: H_matrix_phase(:,:)
+ integer :: degree
+ integer(bit_kind), allocatable :: keys_tmp(:,:,:)
+ allocate(keys_tmp(N_int,2,N_det))
+ do i = 1, N_det
+  print*,''
+  call debug_det(psi_det(1,1,i),N_int)
+  do j = 1, N_int
+   keys_tmp(j,1,i) = psi_det(j,1,i)
+   keys_tmp(j,2,i) = psi_det(j,2,i)
+  enddo
+ enddo
+ if(N_det.gt.10000)then
+  print*,'Warning !!!'
+  print*,'Number of determinants is ',N_det
+  print*,'It means that the H matrix will be enormous !'
+  print*,'stoppping ..'
+  stop
+ endif
+ print*,''
+ print*,'Determinants '
+ do i = 1, N_det
+ enddo
+ allocate(H_matrix_degree(N_det,N_det),H_matrix_phase(N_det,N_det))
+ integer         :: exc(0:2,2,2)
+ double precision  :: phase
+ do i = 1, N_det
+  do j = i, N_det 
+   call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
+   H_matrix_degree(i,j) = degree
+   H_matrix_degree(j,i) = degree
+   phase = 0.d0
+   if(degree==1.or.degree==2)then
+    call get_excitation(psi_det(1,1,i),psi_det(1,1,j),exc,degree,phase,N_int)
+   endif
+   H_matrix_phase(i,j) = phase
+   H_matrix_phase(j,i) = phase
+  enddo
+ enddo
+ print*,'H matrix '
+ double precision :: s2
+ complex*16 :: ref_h_matrix
+ ref_h_matrix = h_matrix_all_dets_complex(1,1)
+ print*,'HF like determinant energy = ',ref_bitmask_energy+nuclear_repulsion
+ print*,'Ref element of H_matrix    = ',ref_h_matrix+nuclear_repulsion
+ print*,'Printing the H matrix ...'
+ print*,''
+ print*,''
+!do i = 1, N_det
+! H_matrix_all_dets(i,i) -= ref_h_matrix
+!enddo
+
+ do i = 1, N_det
+  H_matrix_all_dets_complex(i,i) += nuclear_repulsion
+ enddo
+
+!do i = 5,N_det
+! H_matrix_all_dets(i,3) = 0.d0
+! H_matrix_all_dets(3,i) = 0.d0
+! H_matrix_all_dets(i,4) = 0.d0
+! H_matrix_all_dets(4,i) = 0.d0
+!enddo
+
+
+
+
+! TODO: change for complex
+ do i = 1, N_det
+  write(*,'(I3,X,A3,2000(E24.15))')i,' | ',H_matrix_all_dets_complex(i,:)
+ enddo
+
+! print*,''
+! print*,''
+! print*,''
+! print*,'Printing the degree of excitations within the H matrix'
+! print*,''
+! print*,''
+! do i = 1, N_det
+!  write(*,'(I3,X,A3,X,1000(I1,X))')i,' | ',H_matrix_degree(i,:)
+! enddo
+!
+!
+! print*,''
+! print*,''
+! print*,'Printing the phase of the Hamiltonian matrix elements '
+! print*,''
+! print*,''
+! do i = 1, N_det
+!  write(*,'(I3,X,A3,X,1000(F3.0,X))')i,' | ',H_matrix_phase(i,:)
+! enddo
+! print*,''
+
+
+! double precision, allocatable  :: eigenvalues(:)
+! complex*16, allocatable  :: eigenvectors(:,:)
+! double precision, allocatable  :: s2_eigvalues(:)
+! allocate (eigenvectors(size(H_matrix_all_dets,1),N_det))
+! allocate (eigenvalues(N_det),s2_eigvalues(N_det))
+! call lapack_diag_complex(eigenvalues,eigenvectors,                       &
+!     H_matrix_all_dets,size(H_matrix_all_dets,1),N_det)
+! print*,'Two first eigenvectors '
+! call u_0_S2_u_0(s2_eigvalues,eigenvectors,n_det,keys_tmp,N_int,N_det,size(eigenvectors,1))
+! do j =1, N_states
+!   print*,'s2 = ',s2_eigvalues(j)
+!   print*,'e  = ',eigenvalues(j)
+!   print*,'coefs : '
+!   do i = 1, N_det
+!    print*,'i = ',i,eigenvectors(i,j)
+!   enddo
+!   if(j>1)then
+!    print*,'Delta E(H)  = ',eigenvalues(1) - eigenvalues(j)
+!    print*,'Delta E(eV) = ',(eigenvalues(1) - eigenvalues(j))*27.2114d0
+!   endif
+! enddo
+! complex*16               :: get_mo_bielec_integral,k_a_iv,k_b_iv
+! integer :: h1,p1,h2,p2
+! h1 = 10
+! p1 = 16 
+! h2 = 14 
+! p2 = 14
+!!h1 = 1
+!!p1 = 4
+!!h2 = 2
+!!p2 = 2
+! k_a_iv = get_mo_bielec_integral(h1,h2,p2,p1,mo_integrals_map)
+! h2 = 15 
+! p2 = 15
+! k_b_iv = get_mo_bielec_integral(h1,h2,p2,p1,mo_integrals_map)
+! print*,'k_a_iv = ',k_a_iv
+! print*,'k_b_iv = ',k_b_iv
+! complex*16 :: k_av,k_bv,k_ai,k_bi
+! h1 = 16
+! p1 = 14 
+! h2 = 14 
+! p2 = 16
+! k_av = get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map)
+! h1 = 16
+! p1 = 15 
+! h2 = 15 
+! p2 = 16
+! k_bv = get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map)
+! 
+! h1 = 10
+! p1 = 14 
+! h2 = 14 
+! p2 = 10
+! k_ai = get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map)
+! 
+! h1 = 10
+! p1 = 15 
+! h2 = 15 
+! p2 = 10
+! k_bi = get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map)
+! 
+! print*,'k_av, k_bv = ',k_av,k_bv
+! print*,'k_ai, k_bi = ',k_ai,k_bi
+! complex*16 :: k_iv
+!
+! h1 = 10
+! p1 = 16 
+! h2 = 16 
+! p2 = 10
+! k_iv = get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map)
+! print*,'k_iv       = ',k_iv
+end
diff --git a/src/green/print_h_omp_debug.irp.f b/src/green/print_h_omp_debug.irp.f
new file mode 100644
index 00000000..0a9cd930
--- /dev/null
+++ b/src/green/print_h_omp_debug.irp.f
@@ -0,0 +1,41 @@
+program print_h_omp_debug
+ implicit none
+ read_wf = .True.
+ touch read_wf
+ call routine_omp
+
+end
+
+subroutine routine_omp
+ use bitmasks
+  implicit none
+  integer     :: h_size
+  complex*16, allocatable :: u_tmp(:,:), s_tmp(:,:),v_tmp(:,:)
+  integer :: i,n_st
+  h_size=N_det
+  BEGIN_DOC
+  ! |vec2> = H|vec1>
+  !
+  ! TODO: implement
+  ! maybe reuse parts of H_S2_u_0_nstates_{openmp,zmq}?
+  END_DOC
+  n_st=min(1000,h_size)
+  allocate(u_tmp(n_st,h_size),s_tmp(n_st,h_size),v_tmp(n_st,h_size))
+
+  u_tmp=(0.d0,0.d0)
+  v_tmp=(0.d0,0.d0)
+  s_tmp=(0.d0,0.d0)
+
+  do i=1,n_st
+    u_tmp(i,i)=(1.d0,0.d0)
+  enddo
+
+  call h_s2_u_0_nstates_openmp_complex(v_tmp,s_tmp,u_tmp,n_st,h_size)
+  do i = 1, n_st
+    v_tmp(i,i) += nuclear_repulsion
+  enddo
+ do i = 1, n_st
+  write(*,'(I3,X,A3,2000(E24.15))')i,' | ',v_tmp(i,:)
+ enddo
+  deallocate(u_tmp,v_tmp,s_tmp)
+end
diff --git a/src/green/print_spectral_dens.irp.f b/src/green/print_spectral_dens.irp.f
new file mode 100644
index 00000000..ca6826ce
--- /dev/null
+++ b/src/green/print_spectral_dens.irp.f
@@ -0,0 +1,43 @@
+program print_spectral_dens
+  implicit none
+  BEGIN_DOC
+! TODO
+  END_DOC
+  read_wf = .True.
+  touch read_wf
+  provide n_green_vec
+  call print_lanczos_eigvals
+  call print_spec
+end
+
+subroutine print_lanczos_eigvals
+  implicit none
+  integer :: i, iunit, j
+  integer :: getunitandopen
+  character(5) :: jstr
+
+  do j=1,n_green_vec
+    write(jstr,'(I0.3)') j
+    iunit = getunitandopen('lanczos_eigval_alpha_beta.out.'//trim(jstr),'w')
+    print *, 'printing lanczos eigenvalues, alpha, beta to "lanczos_eigval_alpha_beta.out.'//trim(jstr)//'"'
+    do i=1,n_lanczos_iter
+      write(iunit,'(I6,3(E25.15))') i, lanczos_eigvals(i,j), alpha_lanczos(i,j), beta_lanczos(i,j)
+    enddo
+    close(iunit)
+  enddo
+end
+subroutine print_spec
+  implicit none
+  integer :: i, iunit, j
+  integer :: getunitandopen
+  character(5) :: jstr
+  do j=1,n_green_vec
+    write(jstr,'(I0.3)') j
+    iunit = getunitandopen('omega_A.out.'//trim(jstr),'w')
+    print *, 'printing frequency, spectral density to "omega_A.out.'//trim(jstr)//'"'
+    do i=1,n_omega
+      write(iunit,'(2(E25.15))') omega_list(i), spectral_lanczos(i,j)
+    enddo
+    close(iunit)
+  enddo
+end
diff --git a/src/green/utils_hp.irp.f b/src/green/utils_hp.irp.f
new file mode 100644
index 00000000..264e3014
--- /dev/null
+++ b/src/green/utils_hp.irp.f
@@ -0,0 +1,614 @@
+subroutine print_mo_energies(key_ref,nint,nmo)
+  use bitmasks
+  BEGIN_DOC
+  ! get mo energies for one det
+  END_DOC
+  implicit none
+  integer, intent(in) :: nint, nmo
+  integer(bit_kind), intent(in) :: key_ref(nint,2)
+  double precision, allocatable :: e_mo(:,:)
+  integer, allocatable :: occ(:,:),virt(:,:) !(nint*bit_kind_size,2)
+  integer :: n_occ(2), n_virt(2)
+  integer, parameter :: int_spin2(1:2) = (/2,1/)
+  integer :: i,j,ispin,jspin,i0,j0,k
+  integer(bit_kind), allocatable :: key_virt(:,:)
+  integer, allocatable :: is_occ(:,:)
+
+
+  allocate(occ(nint*bit_kind_size,2),virt(nint*bit_kind_size,2),key_virt(nint,2),e_mo(nmo,2),is_occ(nmo,2))
+  is_occ=0
+
+  call bitstring_to_list_ab(key_ref,occ,n_occ,nint)
+  do i=1,nint
+    do ispin=1,2
+      key_virt(i,ispin)=xor(full_ijkl_bitmask(i),key_ref(i,ispin))
+    enddo
+  enddo
+  call bitstring_to_list_ab(key_virt,virt,n_virt,nint)
+
+  e_mo(1:nmo,1)=mo_one_e_integrals_diag(1:nmo)
+  e_mo(1:nmo,2)=mo_one_e_integrals_diag(1:nmo)
+
+  do ispin=1,2
+    jspin=int_spin2(ispin)
+    do i0=1,n_occ(ispin)
+      i=occ(i0,ispin)
+      is_occ(i,ispin)=1
+      do j0=i0+1,n_occ(ispin)
+        j=occ(j0,ispin)
+        e_mo(i,ispin) = e_mo(i,ispin) + mo_two_e_integrals_jj_anti(i,j)
+        e_mo(j,ispin) = e_mo(j,ispin) + mo_two_e_integrals_jj_anti(i,j)
+      enddo
+      do k=2,ispin
+        do j0=1,n_occ(jspin)
+          j=occ(j0,jspin)
+          e_mo(i,ispin) = e_mo(i,ispin) + mo_two_e_integrals_jj(i,j)
+          e_mo(j,jspin) = e_mo(j,jspin) + mo_two_e_integrals_jj(i,j) !can delete this and remove k level of loop
+        enddo
+      enddo
+      do j0=1,n_virt(ispin)
+        j=virt(j0,ispin)
+        e_mo(j,ispin) = e_mo(j,ispin) + mo_two_e_integrals_jj_anti(i,j)
+      enddo
+      do j0=1,n_virt(jspin)
+        j=virt(j0,jspin)
+        e_mo(j,jspin) = e_mo(j,jspin) + mo_two_e_integrals_jj(i,j)
+      enddo
+    enddo
+  enddo
+
+  do i=1,nmo
+    write(6,'(2(I5),2(E25.15))')is_occ(i,1),is_occ(i,2),e_mo(i,1),e_mo(i,2)
+  enddo
+  deallocate(occ,virt,key_virt,e_mo,is_occ)
+end
+
+subroutine get_mo_energies(key_ref,nint,nmo,e_mo)
+  use bitmasks
+  BEGIN_DOC
+  ! get mo energies for one det
+  END_DOC
+  implicit none
+  integer, intent(in) :: nint, nmo
+  integer(bit_kind), intent(in) :: key_ref(nint,2)
+  double precision, intent(out) :: e_mo(nmo,2)
+  integer, allocatable :: occ(:,:),virt(:,:) !(nint*bit_kind_size,2)
+  integer :: n_occ(2), n_virt(2)
+  integer, parameter :: int_spin2(1:2) = (/2,1/)
+  integer :: i,j,ispin,jspin,i0,j0,k
+  integer(bit_kind), allocatable :: key_virt(:,:)
+
+
+  allocate(occ(nint*bit_kind_size,2),virt(nint*bit_kind_size,2),key_virt(nint,2))
+
+  call bitstring_to_list_ab(key_ref,occ,n_occ,nint)
+  do i=1,nint
+    do ispin=1,2
+      key_virt(i,ispin)=xor(full_ijkl_bitmask(i),key_ref(i,ispin))
+    enddo
+  enddo
+  call bitstring_to_list_ab(key_virt,virt,n_virt,nint)
+
+  e_mo(1:nmo,1)=mo_one_e_integrals_diag(1:nmo)
+  e_mo(1:nmo,2)=mo_one_e_integrals_diag(1:nmo)
+
+  do ispin=1,2
+    jspin=int_spin2(ispin)
+    do i0=1,n_occ(ispin)
+      i=occ(i0,ispin)
+      do j0=i0+1,n_occ(ispin)
+        j=occ(j0,ispin)
+        e_mo(i,ispin) = e_mo(i,ispin) + mo_two_e_integrals_jj_anti(i,j)
+        e_mo(j,ispin) = e_mo(j,ispin) + mo_two_e_integrals_jj_anti(i,j)
+      enddo
+      do k=2,ispin
+        do j0=1,n_occ(jspin)
+          j=occ(j0,jspin)
+          e_mo(i,ispin) = e_mo(i,ispin) + mo_two_e_integrals_jj(i,j)
+          e_mo(j,jspin) = e_mo(j,jspin) + mo_two_e_integrals_jj(i,j) !can delete this and remove k level of loop
+        enddo
+      enddo
+      do j0=1,n_virt(ispin)
+        j=virt(j0,ispin)
+        e_mo(j,ispin) = e_mo(j,ispin) + mo_two_e_integrals_jj_anti(i,j)
+      enddo
+      do j0=1,n_virt(jspin)
+        j=virt(j0,jspin)
+        e_mo(j,jspin) = e_mo(j,jspin) + mo_two_e_integrals_jj(i,j)
+      enddo
+    enddo
+  enddo
+
+  deallocate(occ,virt,key_virt)
+end
+
+subroutine get_mask_phase_new(det1, pm, Nint)
+  use bitmasks
+  BEGIN_DOC
+  ! phasemask copied from qp2
+  ! return phasemask of det1 in pm
+  END_DOC
+  implicit none
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2)
+  integer(bit_kind), intent(out) :: pm(Nint,2)
+  integer(bit_kind) :: tmp1, tmp2
+  integer :: i
+  pm(1:Nint,1:2) = det1(1:Nint,1:2)
+  tmp1 = 0_8
+  tmp2 = 0_8
+  do i=1,Nint
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 1))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 1))
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2))
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4))
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8))
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16))
+    pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32))
+    pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32))
+    pm(i,1) = ieor(pm(i,1), tmp1)
+    pm(i,2) = ieor(pm(i,2), tmp2)
+    if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1)
+    if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2)
+  end do
+end subroutine
+
+subroutine get_phase_hp(g_idx_int,g_idx_bit,g_spin,g_sign,det_in,g_det_phase,nint,n_g)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: nint,n_g
+  integer, intent(in) :: g_idx_int(n_g), g_idx_bit(n_g),g_spin(n_g)
+  double precision, intent(in) :: g_sign(n_g)
+  integer(bit_kind), intent(in) :: det_in(nint,2)
+  double precision, intent(out) :: g_det_phase(n_g)
+
+  integer(bit_kind) :: tmp_spindet(nint), pm(nint,2)
+  double precision, parameter :: phase_dble(0:1) = (/1.d0,-1.d0/)
+
+  integer :: i
+  logical :: is_allowed(n_g), all_banned, is_filled
+
+  all_banned=.True.
+  do i=1,n_g
+    tmp_spindet(1:nint) = det_in(1:nint,g_spin(i))
+    call spinorb_is_filled_int_bit(tmp_spindet,g_idx_int(i),g_idx_bit(i),nint,is_filled)
+    is_allowed(i) = (.not.(((g_sign(i)<0).and.(.not.is_filled)).or.((g_sign(i)>0).and.(is_filled))))
+    all_banned=(all_banned.and.(.not.is_allowed(i)))
+  enddo
+
+  if (all_banned) then
+    g_det_phase(:)=0.d0
+  else
+    call get_mask_phase_new(det_in,pm,nint)
+    do i=1,n_g
+      if (is_allowed(i)) then
+        g_det_phase(i) = phase_dble(popcnt(iand(ibset(0_bit_kind,g_idx_bit(i)),pm(g_idx_int(i),g_spin(i)))))
+      else
+        g_det_phase(i)=0.d0
+      endif
+    enddo
+  endif
+end
+
+subroutine get_homo_lumo(key_ref,nint,nmo,idx_homo_lumo,spin_homo_lumo)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: nint,nmo
+  integer(bit_kind), intent(in) :: key_ref(nint,2)
+  integer, intent(out) :: idx_homo_lumo(2), spin_homo_lumo(2)
+
+  double precision, allocatable :: e_mo(:,:)
+  integer, allocatable :: occ(:,:),virt(:,:) !(nint*bit_kind_size,2)
+  integer :: n_occ(2), n_virt(2)
+  integer :: i,i0,ispin
+  integer(bit_kind), allocatable :: key_virt(:,:)
+  double precision :: maxocc(2), minvirt(2)
+  integer :: imaxocc(2), iminvirt(2)
+
+  allocate(e_mo(nmo,2),key_virt(nint,2),occ(nint*bit_kind_size,2),virt(nint*bit_kind_size,2))
+
+  call get_mo_energies(key_ref,nint,nmo,e_mo)
+  
+  !allocate(occ(nint*bit_kind_size,2),virt(nint*bit_kind_size,2))
+
+  call bitstring_to_list_ab(key_ref,occ,n_occ,nint)
+  do i=1,nint
+    do ispin=1,2
+      key_virt(i,ispin)=xor(full_ijkl_bitmask(i),key_ref(i,ispin))
+    enddo
+  enddo
+  call bitstring_to_list_ab(key_virt,virt,n_virt,nint)
+  
+  maxocc=-1.d20 !maybe use -1.d0*huge(0.d0)?
+  minvirt=1.d20
+  imaxocc=-1
+  iminvirt=-1
+
+  do ispin=1,2
+    do i0=1,n_occ(ispin)
+      i=occ(i0,ispin)
+      if (e_mo(i,ispin).gt.maxocc(ispin)) then
+        maxocc(ispin)=e_mo(i,ispin)
+        imaxocc(ispin)=i
+      endif
+    enddo
+    do i0=1,n_virt(ispin)
+      i=virt(i0,ispin)
+      if (e_mo(i,ispin).lt.minvirt(ispin)) then
+        minvirt(ispin)=e_mo(i,ispin)
+        iminvirt(ispin)=i
+      endif
+    enddo
+  enddo
+  double precision :: e_mo_thresh
+  e_mo_thresh = 1.d-8
+  !these should both just be 2x2 arrays, but performance here doesn't really matter and this is more readable
+  !if (maxocc(1).ge.maxocc(2)) then
+  if ((maxocc(2)-maxocc(1)).le.e_mo_thresh) then
+    spin_homo_lumo(1)=1
+  else
+    spin_homo_lumo(1)=2
+  endif
+  if ((minvirt(1)-minvirt(2)).le.e_mo_thresh) then
+    spin_homo_lumo(2)=1
+  else
+    spin_homo_lumo(2)=2
+  endif
+
+  idx_homo_lumo(1)=imaxocc(spin_homo_lumo(1))
+  idx_homo_lumo(2)=iminvirt(spin_homo_lumo(2))
+
+  deallocate(e_mo,occ,virt,key_virt)
+
+end
+
+subroutine get_list_hp_banned_ab(tmp_det,N_hp,exc_is_banned,spin_hp,sign_hp,idx_hp,nint,all_banned)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! input determinant tmp_det and list of single holes/particles
+  ! for each hole/particle, determine whether it is filled/empty in tmp_det
+  ! return which are disallowed in exc_is_banned
+  ! if all are banned, set all_banned to true
+  END_DOC
+  integer, intent(in) :: N_hp,nint
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  integer(bit_kind), intent(in) :: tmp_det(nint,2)
+  logical, intent(out) :: exc_is_banned(N_hp)
+  logical, intent(out) :: all_banned
+  
+  integer :: i
+  logical :: is_filled
+
+  all_banned = .True.
+  do i=1,N_hp
+    call orb_is_filled(tmp_det,idx_hp(i),spin_hp(i),nint,is_filled)
+    if (sign_hp(i).gt.0) then ! particle creation, banned if already filled
+      exc_is_banned(i) = is_filled
+    else ! hole creation, banned if already empty
+      exc_is_banned(i) = (.not.is_filled)
+    endif
+    all_banned = (all_banned.and.exc_is_banned(i))
+  enddo
+end
+
+subroutine get_list_hp_banned_single_spin(tmp_spindet,N_hp,exc_is_banned,spin_hp,sign_hp,idx_hp,ispin,nint,all_banned)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! input spindeterminant tmp_spindet and list of single holes/particles
+  ! tmp_spindet is only one spin part of a full det, with spin==ispin
+  ! for each hole/particle, determine whether it is filled/empty in tmp_det
+  ! return which are disallowed in exc_is_banned
+  ! if all are banned, set all_banned to true
+  END_DOC
+  integer, intent(in) :: N_hp, ispin, nint
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  integer(bit_kind), intent(in) :: tmp_spindet(nint)
+  logical, intent(out) :: exc_is_banned(N_hp)
+  logical, intent(out) :: all_banned
+  
+  integer :: i
+  logical :: is_filled
+
+  all_banned = .True.
+  do i=1,N_hp
+    if (spin_hp(i).eq.ispin) then
+      call orb_is_filled_single_spin(tmp_spindet,idx_hp(i),nint,is_filled)
+      if (sign_hp(i).gt.0) then ! particle creation, banned if already filled
+        exc_is_banned(i) = is_filled
+      else ! hole creation, banned if already empty
+        exc_is_banned(i) = (.not.is_filled)
+      endif
+    else
+      exc_is_banned(i) = .False.
+    endif
+    all_banned = (all_banned.and.exc_is_banned(i))
+  enddo
+end
+
+subroutine get_list_hp_banned_spin(tmp_det,N_hp,exc_is_banned,spin_hp,sign_hp,idx_hp,ispin,nint,all_banned)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! input determinant tmp_det and list of single holes/particles
+  ! for each hole/particle, determine whether it is filled/empty in tmp_det
+  ! return which are disallowed in exc_is_banned
+  ! if all are banned, set all_banned to true
+  ! only consider tmp_det(1:N_int, ispin)
+  END_DOC
+  integer, intent(in) :: N_hp, ispin, nint
+  integer, intent(in) :: spin_hp(N_hp), idx_hp(N_hp)
+  double precision, intent(in) :: sign_hp(N_hp)
+  integer(bit_kind), intent(in) :: tmp_det(nint,2)
+  logical, intent(out) :: exc_is_banned(N_hp)
+  logical, intent(out) :: all_banned
+
+  integer(bit_kind) :: spindet(nint)
+  
+  integer :: i
+  logical :: is_filled
+  spindet(1:nint) = tmp_det(1:nint,ispin)
+
+  all_banned = .True.
+  do i=1,N_hp
+    if (spin_hp(i).eq.ispin) then
+      call orb_is_filled(tmp_det,idx_hp(i),ispin,nint,is_filled)
+      if (sign_hp(i).gt.0) then ! particle creation, banned if already filled
+        exc_is_banned(i) = is_filled
+      else ! hole creation, banned if already empty
+        exc_is_banned(i) = (.not.is_filled)
+      endif
+    else
+      exc_is_banned(i) = .False.
+    endif
+    all_banned = (all_banned.and.exc_is_banned(i))
+  enddo
+end
+
+  
+subroutine spinorb_is_filled_int_bit(key_ref,iorb_int,iorb_bit,Nint,is_filled)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! determine whether iorb is filled in key_ref
+  ! iorb is specified by int and bit locations within the determinant
+  END_DOC
+  integer, intent(in)            :: iorb_int, iorb_bit, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint)
+  logical, intent(out) :: is_filled
+  
+  ASSERT (iorb_int > 0)
+  ASSERT (iorb_bit >= 0)
+  ASSERT (Nint > 0)
+  is_filled = btest(key_ref(iorb_int),iorb_bit)
+end
+
+subroutine orb_is_filled_int_bit(key_ref,iorb_int,iorb_bit,ispin,Nint,is_filled)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  !  todo: not finished
+  ! determine whether iorb is filled in key_ref
+  ! iorb is specified by int and bit locations within the determinant
+  END_DOC
+  integer, intent(in)            :: iorb_int, iorb_bit, ispin, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint,2)
+  logical, intent(out) :: is_filled
+  
+  ASSERT (iorb_int > 0)
+  ASSERT (iorb_bit >= 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+  is_filled = btest(key_ref(iorb_int,ispin),iorb_bit)
+!  call spinorb_is_filled_int_bit(key_ref(1,ispin),iorb_int,iorb_bit,Nint,is_filled)
+end
+
+subroutine get_orb_int_bit(iorb,iorb_int,iorb_bit)
+  BEGIN_DOC
+  ! get int and bit corresponding to orbital index iorb 
+  END_DOC
+  use bitmasks
+  implicit none
+  integer, intent(in)            :: iorb
+  integer, intent(out)            :: iorb_int, iorb_bit
+  ASSERT (iorb > 0)
+  iorb_int = ishft(iorb-1,-bit_kind_shift)+1
+  ASSERT (iorb_int > 0)
+  iorb_bit = iorb - ishft(iorb_int-1,bit_kind_shift)-1
+  ASSERT (iorb_bit >= 0)
+end
+
+subroutine orb_is_filled_single_spin(key_ref,iorb,Nint,is_filled)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! determine whether iorb is filled in key_ref
+  ! key_ref is single alpha or beta determinant
+  END_DOC
+  integer, intent(in)            :: iorb, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint)
+  logical, intent(out) :: is_filled
+  
+  integer                        :: k,l
+  
+  ASSERT (iorb > 0)
+  ASSERT (Nint > 0)
+  
+  ! k is index of the int where iorb is found
+  ! l is index of the bit where iorb is found
+  k = ishft(iorb-1,-bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - ishft(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  is_filled = btest(key_ref(k),l)  
+end
+
+subroutine orb_is_filled(key_ref,iorb,ispin,Nint,is_filled)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! determine whether iorb, ispin is filled in key_ref
+  ! key_ref has alpha and beta parts
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint,2)
+  logical, intent(out) :: is_filled
+  
+  integer                        :: k,l
+  
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+  
+  ! k is index of the int where iorb is found
+  ! l is index of the bit where iorb is found
+  k = ishft(iorb-1,-bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - ishft(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  is_filled = btest(key_ref(k,ispin),l)  
+end
+
+subroutine ac_operator_phase(key_new,key_ref,iorb,ispin,Nint,phase)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! apply creation operator to key_ref
+  ! add electron with spin ispin to orbital with index iorb
+  ! output resulting det and phase in key_new and phase
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint,2)
+  integer(bit_kind), intent(out) :: key_new(Nint,2)
+  double precision, intent(out) :: phase
+  
+  integer                        :: k,l,i
+
+  double precision, parameter :: p(0:1) = (/ 1.d0, -1.d0 /)
+  
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+  
+  key_new=key_ref
+
+  ! alpha det is list of Nint 64-bit ints
+  ! k is index of the int where iorb is found
+  ! l is index of the bit where iorb is found
+  k = ishft(iorb-1,-bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - ishft(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key_new(k,ispin) = ibset(key_new(k,ispin),l)
+  
+  integer(bit_kind) :: parity_filled
+
+  ! I assume here that the ordering is all alpha spinorbs and then all beta spinorbs
+  ! If we add an alpha electron, parity is not affected by beta part of determinant
+  !   (only need number of alpha occupied orbs below iorb)
+ 
+  ! If we add a beta electron, the parity is affected by alpha part
+  !    (need total number of occupied alpha orbs (all of which come before beta)
+  !      and total number of beta occupied orbs below iorb)
+
+  if (ispin==1) then
+    parity_filled=0_bit_kind
+  else
+    parity_filled=iand(int(elec_alpha_num,bit_kind),1_bit_kind)
+  endif
+
+  ! get parity due to orbs in other ints (with lower indices)
+  do i=1,k-1
+    parity_filled = iand(int(popcnt(key_ref(i,ispin)),bit_kind),parity_filled)
+  enddo
+  
+  ! get parity due to orbs in same int as iorb
+  ! ishft(1_bit_kind,l)-1 has its l rightmost bits set to 1, other bits set to 0
+  parity_filled = iand(int(popcnt(iand(ishft(1_bit_kind,l)-1,key_ref(k,ispin))),bit_kind),parity_filled)
+  phase = p(iand(1_bit_kind,parity_filled))
+
+end
+
+subroutine a_operator_phase(key_new,key_ref,iorb,ispin,Nint,phase)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! apply annihilation operator to key_ref
+  ! remove electron with spin ispin to orbital with index iorb
+  ! output resulting det and phase in key_new and phase
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer(bit_kind), intent(in) :: key_ref(Nint,2)
+  integer(bit_kind), intent(out) :: key_new(Nint,2)
+  double precision, intent(out) :: phase
+  
+  integer                        :: k,l,i
+
+  double precision, parameter :: p(0:1) = (/ 1.d0, -1.d0 /)
+  
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+  
+  key_new=key_ref
+
+  ! alpha det is list of Nint 64-bit ints
+  ! k is index of the int where iorb is found
+  ! l is index of the bit where iorb is found
+  k = ishft(iorb-1,-bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - ishft(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key_new(k,ispin) = ibclr(key_new(k,ispin),l)
+  
+  integer(bit_kind) :: parity_filled
+
+  ! I assume here that the ordering is all alpha spinorbs and then all beta spinorbs
+  ! If we add an alpha electron, parity is not affected by beta part of determinant
+  !   (only need number of alpha occupied orbs below iorb)
+ 
+  ! If we add a beta electron, the parity is affected by alpha part
+  !    (need total number of occupied alpha orbs (all of which come before beta)
+  !      and total number of beta occupied orbs below iorb)
+
+  if (ispin==1) then
+    parity_filled=0_bit_kind
+  else
+    parity_filled=iand(int(elec_alpha_num,bit_kind),1_bit_kind)
+  endif
+
+  ! get parity due to orbs in other ints (with lower indices)
+  do i=1,k-1
+    parity_filled = iand(int(popcnt(key_ref(i,ispin)),bit_kind),parity_filled)
+  enddo
+  
+  ! get parity due to orbs in same int as iorb
+  ! ishft(1_bit_kind,l)-1 has its l rightmost bits set to 1, other bits set to 0
+  parity_filled = iand(int(popcnt(iand(ishft(1_bit_kind,l)-1,key_ref(k,ispin))),bit_kind),parity_filled)
+  phase = p(iand(1_bit_kind,parity_filled))
+
+end
+!BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_diag,(mo_num)]
+!  implicit none
+!  integer                        :: i
+!  BEGIN_DOC
+!  ! diagonal elements of mo_mono_elec_integral array
+!  END_DOC
+!  print*,'Providing the mono electronic integrals (diagonal)'
+!
+!  do i = 1, mo_num
+!    mo_mono_elec_integral_diag(i) = real(mo_mono_elec_integral(i,i))
+!  enddo
+!
+!END_PROVIDER
diff --git a/src/mo_two_e_ints/df_mo_ints.irp.f b/src/mo_two_e_ints/df_mo_ints.irp.f
index 3a61911e..eba3b3da 100644
--- a/src/mo_two_e_ints/df_mo_ints.irp.f
+++ b/src/mo_two_e_ints/df_mo_ints.irp.f
@@ -48,7 +48,8 @@ subroutine mo_map_fill_from_df_dot
   logical :: use_map1
   integer(key_kind) :: idx_tmp
   double precision :: sign
-  complex*16, external :: zdotc
+  !complex*16, external :: zdotc
+  complex*16, external :: zdotu
 
   mo_num_kpt_2 = mo_num_per_kpt * mo_num_per_kpt
 
@@ -145,7 +146,8 @@ subroutine mo_map_fill_from_df_dot
                 if ((j==l) .and. (i>k)) exit
                 call idx2_tri_int(i,k,ik2)
                 if (ik2 > jl2) exit
-                integral = zdotc(df_num,ints_jl(1,ij,il),1,ints_ik(1,ii,ik),1)
+                !integral = zdotc(df_num,ints_jl(1,ij,il),1,ints_ik(1,ii,ik),1)
+                integral = zdotu(df_num,ints_jl(1,ij,il),1,ints_ik(1,ii,ik),1)
 !                print*,i,k,j,l,real(integral),imag(integral)
                 if (cdabs(integral) < mo_integrals_threshold) then
                   cycle
diff --git a/src/tools/fcidump.irp.f b/src/tools/fcidump.irp.f
index bf4d07fb..de878dc6 100644
--- a/src/tools/fcidump.irp.f
+++ b/src/tools/fcidump.irp.f
@@ -18,6 +18,97 @@ program fcidump
 !   electrons
 !
   END_DOC
+  if (is_complex) then
+    call fcidump_complex
+  else
+    call fcidump_real
+  endif
+end
+
+subroutine fcidump_complex
+  implicit none
+  character*(128) :: output
+  integer :: i_unit_output,getUnitAndOpen
+  output=trim(ezfio_filename)//'.FCIDUMP'
+  i_unit_output = getUnitAndOpen(output,'w')
+
+  integer :: i,j,k,l
+  integer :: i1,j1,k1,l1
+  integer :: i2,j2,k2,l2,ik2,jl2
+  integer :: ki,kj,kk,kl
+  integer :: ii,ij,ik,il
+  integer*8 :: m
+  character*(2), allocatable :: A(:)
+
+  write(i_unit_output,*) '&FCI NORB=', n_act_orb, ', NELEC=', elec_num-n_core_orb*2, &
+   ', MS2=', (elec_alpha_num-elec_beta_num), ','
+  allocate (A(n_act_orb))
+  A = '1,'
+  write(i_unit_output,*) 'ORBSYM=', (A(i), i=1,n_act_orb)
+  write(i_unit_output,*) 'ISYM=0,'
+  write(i_unit_output,*) '/'
+  deallocate(A)
+
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  integer(cache_map_size_kind)   :: n_elements, n_elements_max
+  PROVIDE mo_two_e_integrals_in_map
+
+  complex*16 :: get_two_e_integral_complex, integral
+
+  do kl=1,kpt_num
+    do kj=1,kl
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+        do l1=1,n_act_orb_kpts(kl)
+          il=list_act_kpts(l1,kl)
+          l = (kl-1)*mo_num_per_kpt + il
+          do j1=1,n_act_orb_kpts(kj)
+            ij=list_act_kpts(j1,kj)
+            j = (kj-1)*mo_num_per_kpt + ij
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do k1=1,n_act_orb_kpts(kk)
+              ik=list_act_kpts(k1,kk)
+              k = (kk-1)*mo_num_per_kpt + ik
+              if (k>l) exit
+              do i1=1,n_act_orb_kpts(ki)
+                ii=list_act_kpts(i1,ki)
+                i = (ki-1)*mo_num_per_kpt + ii
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                integral = get_two_e_integral_complex(i,j,k,l,mo_integrals_map,mo_integrals_map_2)
+                if (cdabs(integral) > mo_integrals_threshold) then
+                  write(i_unit_output,'(2(E25.15,X),4(I6,X))') dble(integral), dimag(integral),i,k,j,l
+                endif
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do kj=1,kpt_num
+    do j1=1,n_act_orb_kpts(kj)
+      ij = list_act_kpts(j1,kj)
+      j = (kj-1)*mo_num_per_kpt + ij
+      do i1=j1,n_act_orb_kpts(kj)
+        ii = list_act_kpts(i1,kj)
+        i = (kj-1)*mo_num_per_kpt + ii
+        integral = mo_one_e_integrals_kpts(ii,ij,kj) + core_fock_operator_complex(i,j)
+        if (cdabs(integral) > mo_integrals_threshold) then
+          write(i_unit_output,'(2(E25.15,X),4(I6,X))') dble(integral),dimag(integral), i,j,0,0
+        endif
+      enddo
+    enddo
+  enddo
+  write(i_unit_output,*) core_energy, 0, 0, 0, 0
+end
+subroutine fcidump_real
+  implicit none
   character*(128) :: output
   integer :: i_unit_output,getUnitAndOpen
   output=trim(ezfio_filename)//'.FCIDUMP'
diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F
index 7399b4a6..bad68054 100644
--- a/src/utils/constants.include.F
+++ b/src/utils/constants.include.F
@@ -7,6 +7,7 @@ double precision, parameter :: sqpi =  dsqrt(dacos(-1.d0))
 double precision, parameter :: pi_5_2 =  34.9868366552d0
 double precision, parameter :: dfour_pi =  4.d0*dacos(-1.d0)
 double precision, parameter :: dtwo_pi =  2.d0*dacos(-1.d0)
+double precision, parameter :: inv_pi =  1.d0/dacos(-1.d0)
 double precision, parameter :: inv_sq_pi =  1.d0/dsqrt(dacos(-1.d0))
 double precision, parameter :: inv_sq_pi_2 = 0.5d0/dsqrt(dacos(-1.d0))
 double precision, parameter :: thresh = 1.d-15