diff --git a/REPLACE b/REPLACE
index 9485fb2a..97df8b76 100755
--- a/REPLACE
+++ b/REPLACE
@@ -126,7 +126,6 @@ qp_name H_S2_u_0_bielec_nstates_openmp_work_3 -r H_S2_u_0_two_e_nstates_openmp_w
 qp_name H_S2_u_0_bielec_nstates_openmp_work_4 -r H_S2_u_0_two_e_nstates_openmp_work_4
 qp_name H_S2_u_0_bielec_nstates_openmp_work_$N_int
 qp_name H_S2_u_0_bielec_nstates_openmp_work_$N_int -r "H_S2_u_0_two_e_nstates_openmp_work_$N_int"
-qp_name H_S2_u_0_bielec_nstates_openmp_work_$N_int #-r "H_S2_u_0_two_e_nstates_openmp_work_$N_int"
 qp_name H_S2_u_0_bielec_nstates_openmp_work -r H_S2_u_0_two_e_nstates_openmp_work
 qp_name H_S2_u_0_bielec_nstates_openmp_work_ -r H_S2_u_0_two_e_nstates_openmp_work_
 qp_name i_H_j_bielec -r i_H_j_two_e
@@ -227,6 +226,7 @@ qp_name ref_bitmask_e_n_energy -r ref_bitmask_n_e_energy
 qp_name read_ao_integrals_e_n -r read_ao_integrals_n_e
 qp_name write_ao_integrals_e_n -r write_ao_integrals_n_e
 qp_name psi_energy_bielec -r psi_energy_two_e
+qp_name read_ao_integrals_e_n -r read_ao_integrals_n_e
 qp_name read_ao_integrals --rename="read_ao_two_e_integrals"
 qp_name read_ao_integrals --rename=read_ao_two_e_integrals
 qp_name read_mo_integrals_erf -r read_mo_two_e_integrals_erf
@@ -253,3 +253,11 @@ qp_name ezfio_set_mo_one_e_ints_mo_integrals_e_n -r ezfio_set_mo_one_e_ints_mo_i
 qp_name ezfio_has_mo_one_e_ints_mo_integrals_e_n -r ezfio_has_mo_one_e_ints_mo_integrals_n_e
 qp_name ezfio_has_mo_one_e_ints_io_mo_integrals_e_n -r ezfio_has_mo_one_e_ints_io_mo_integrals_n_e
 qp_name ezfio_get_mo_one_e_ints_io_mo_integrals_e_n -r ezfio_get_mo_one_e_ints_io_mo_integrals_n_e
+qp_name ao_ortho_canonical_coef_inv_complex -r ao_ortho_cano_coef_inv_cplx
+qp_name fock_operator_closed_shell_ref_bitmask -r fock_op_cshell_ref_bitmask
+qp_name fock_operator_closed_shell_ref_bitmask_complex -r fock_op_cshell_ref_bitmask_cplx
+qp_name ao_ortho_canonical_coef_inv -r ao_ortho_cano_coef_inv
+qp_name ao_ortho_cano_to_ao_complex -r ao_ortho_cano_to_ao_cplx
+qp_name ao_ortho_lowdin_nucl_elec_integrals_complex -r ao_ortho_lowdin_n_e_ints_cplx
+qp_name ao_ortho_canonical_nucl_elec_integrals_complex -r ao_ortho_cano_n_e_ints_cplx
+qp_name ao_ortho_canonical_nucl_elec_integrals -r ao_ortho_cano_n_e_ints
diff --git a/bin/qp_convert_h5_to_ezfio b/bin/qp_convert_h5_to_ezfio
new file mode 100755
index 00000000..8b7c038d
--- /dev/null
+++ b/bin/qp_convert_h5_to_ezfio
@@ -0,0 +1,548 @@
+#!/usr/bin/env python3
+"""
+convert hdf5 output (e.g. from PySCF) to ezfio
+
+Usage:
+    qp_convert_h5_to_ezfio [-o EZFIO_DIR] FILE
+
+Options:
+    -o --output=EZFIO_DIR    Produced directory
+                             by default is FILE.ezfio
+
+"""
+from ezfio import ezfio
+import h5py
+import sys
+import numpy as np
+import os
+from docopt import docopt
+#fname = sys.argv[1]
+#qph5name = sys.argv[2]
+
+def get_full_path(file_path):
+    file_path = os.path.expanduser(file_path)
+    file_path = os.path.expandvars(file_path)
+#    file_path = os.path.abspath(file_path)
+    return file_path
+
+def convert_mol(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(False)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_nucl_num(nucl_num)
+    
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    if isinstance(nucl_label[0],bytes):
+        nucl_label = list(map(lambda x:x.decode(),nucl_label))
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        do_pseudo = qph5['pseudo'].attrs['do_pseudo']
+        ezfio.set_pseudo_do_pseudo(do_pseudo)
+        if (do_pseudo):
+            ezfio.set_pseudo_pseudo_lmax(qph5['pseudo'].attrs['pseudo_lmax'])
+            ezfio.set_pseudo_pseudo_klocmax(qph5['pseudo'].attrs['pseudo_klocmax'])
+            ezfio.set_pseudo_pseudo_kmax(qph5['pseudo'].attrs['pseudo_kmax'])
+            ezfio.set_pseudo_nucl_charge_remove(qph5['pseudo/nucl_charge_remove'][()].tolist())
+            ezfio.set_pseudo_pseudo_n_k(qph5['pseudo/pseudo_n_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_n_kl(qph5['pseudo/pseudo_n_kl'][()].tolist())
+            ezfio.set_pseudo_pseudo_v_k(qph5['pseudo/pseudo_v_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_v_kl(qph5['pseudo/pseudo_v_kl'][()].tolist())
+            ezfio.set_pseudo_pseudo_dz_k(qph5['pseudo/pseudo_dz_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_dz_kl(qph5['pseudo/pseudo_dz_kl'][()].tolist())
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        #coeftmp = qph5['ao_basis/ao_coef'][()]
+        #expotmp = qph5['ao_basis/ao_expo'][()]
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+        ezfio.set_ao_basis_ao_prim_num(qph5['ao_basis/ao_prim_num'][()].tolist())
+        ezfio.set_ao_basis_ao_power(qph5['ao_basis/ao_power'][()].tolist())
+        ezfio.set_ao_basis_ao_coef(qph5['ao_basis/ao_coef'][()].tolist())
+        ezfio.set_ao_basis_ao_expo(qph5['ao_basis/ao_expo'][()].tolist())
+    
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef = qph5['mo_basis/mo_coef'][()].tolist()
+    ezfio.set_mo_basis_mo_coef(mo_coef)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    return
+
+def convert_kpts(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(True)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kpt_num = qph5['nuclei'].attrs['kpt_num']
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_kpt_num(kpt_num)
+    kpt_pair_num = (kpt_num*kpt_num + kpt_num)//2
+    ezfio.set_nuclei_kpt_pair_num(kpt_pair_num)
+    
+    # don't multiply nuclei by kpt_num
+    # work in k-space, not in equivalent supercell
+    nucl_num_per_kpt = nucl_num 
+    ezfio.set_nuclei_nucl_num(nucl_num_per_kpt)
+    
+    # these are totals (kpt_num * num_per_kpt)
+    # need to change if we want to truncate orbital space within pyscf
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.set_ao_basis_ao_num_per_kpt(ao_num//kpt_num)
+    ezfio.set_mo_basis_mo_num_per_kpt(mo_num//kpt_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    if isinstance(nucl_label[0],bytes):
+        nucl_label = list(map(lambda x:x.decode(),nucl_label))
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+    
+    
+    #Just need one (can clean this up later)
+    ao_prim_num_max  = 5
+    
+    d = [ [0] *ao_prim_num_max]*ao_num
+    ezfio.set_ao_basis_ao_prim_num([ao_prim_num_max]*ao_num)
+    ezfio.set_ao_basis_ao_power(d)
+    ezfio.set_ao_basis_ao_coef(d)
+    ezfio.set_ao_basis_ao_expo(d)
+    
+    
+    
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef_kpts = qph5['mo_basis/mo_coef_kpts'][()].tolist()
+        mo_coef_cplx = qph5['mo_basis/mo_coef_complex'][()].tolist()
+    ezfio.set_mo_basis_mo_coef_kpts(mo_coef_kpts)
+    ezfio.set_mo_basis_mo_coef_complex(mo_coef_cplx)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    
+    ##########################################
+    #                                        #
+    #            Integrals Mono              #
+    #                                        #
+    ##########################################
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_one_e_ints' in qph5.keys():
+            kin_ao_reim=qph5['ao_one_e_ints/ao_integrals_kinetic_kpts'][()].tolist()
+            ovlp_ao_reim=qph5['ao_one_e_ints/ao_integrals_overlap_kpts'][()].tolist()
+            ne_ao_reim=qph5['ao_one_e_ints/ao_integrals_n_e_kpts'][()].tolist()
+    
+            ezfio.set_ao_one_e_ints_ao_integrals_kinetic_kpts(kin_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_overlap_kpts(ovlp_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_n_e_kpts(ne_ao_reim)
+            
+            ezfio.set_ao_one_e_ints_io_ao_integrals_kinetic('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_overlap('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_n_e('Read')
+      
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'mo_one_e_ints' in qph5.keys():
+            kin_mo_reim=qph5['mo_one_e_ints/mo_integrals_kinetic_kpts'][()].tolist()
+            ovlp_mo_reim=qph5['mo_one_e_ints/mo_integrals_overlap_kpts'][()].tolist()
+            ne_mo_reim=qph5['mo_one_e_ints/mo_integrals_n_e_kpts'][()].tolist()
+    
+            ezfio.set_mo_one_e_ints_mo_integrals_kinetic_kpts(kin_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_overlap_kpts(ovlp_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_n_e_kpts(ne_mo_reim)
+            
+            ezfio.set_mo_one_e_ints_io_mo_integrals_kinetic('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_overlap('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+      
+    ##########################################
+    #                                        #
+    #               k-points                 #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kconserv = qph5['nuclei/kconserv'][()].tolist()
+    
+    ezfio.set_nuclei_kconserv(kconserv)
+    ezfio.set_nuclei_io_kconserv('Read')
+      
+    ##########################################
+    #                                        #
+    #             Integrals Bi               #
+    #                                        #
+    ##########################################
+    
+    # should this be in ao_basis? ao_two_e_ints?
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+            if 'df_ao_integrals' in qph5['ao_two_e_ints'].keys():
+    #            dfao_re0=qph5['ao_two_e_ints/df_ao_integrals_real'][()].transpose((3,2,1,0))
+    #            dfao_im0=qph5['ao_two_e_ints/df_ao_integrals_imag'][()].transpose((3,2,1,0))
+    #            dfao_cmplx0 = np.stack((dfao_re0,dfao_im0),axis=-1).tolist()
+    #            ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_cmplx0)
+                dfao_reim=qph5['ao_two_e_ints/df_ao_integrals'][()].tolist()
+                ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_reim)
+                ezfio.set_ao_two_e_ints_io_df_ao_integrals('Read')
+    
+        if 'mo_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+    #        dfmo_re0=qph5['mo_two_e_ints/df_mo_integrals_real'][()].transpose((3,2,1,0))
+    #        dfmo_im0=qph5['mo_two_e_ints/df_mo_integrals_imag'][()].transpose((3,2,1,0))
+    #        dfmo_cmplx0 = np.stack((dfmo_re0,dfmo_im0),axis=-1).tolist()
+    #        ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_cmplx0)
+            dfmo_reim=qph5['mo_two_e_ints/df_mo_integrals'][()].tolist()
+            ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_reim)
+            ezfio.set_mo_two_e_ints_io_df_mo_integrals('Read')
+   
+    return
+
+def convert_cplx(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(True)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kpt_num = qph5['nuclei'].attrs['kpt_num']
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_kpt_num(kpt_num)
+    kpt_pair_num = (kpt_num*kpt_num + kpt_num)//2
+    ezfio.set_nuclei_kpt_pair_num(kpt_pair_num)
+    
+    # don't multiply nuclei by kpt_num
+    # work in k-space, not in equivalent supercell
+    nucl_num_per_kpt = nucl_num 
+    ezfio.set_nuclei_nucl_num(nucl_num_per_kpt)
+    
+    # these are totals (kpt_num * num_per_kpt)
+    # need to change if we want to truncate orbital space within pyscf
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    if isinstance(nucl_label[0],bytes):
+        nucl_label = list(map(lambda x:x.decode(),nucl_label))
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+    
+    
+    #Just need one (can clean this up later)
+    ao_prim_num_max  = 5
+    
+    d = [ [0] *ao_prim_num_max]*ao_num
+    ezfio.set_ao_basis_ao_prim_num([ao_prim_num_max]*ao_num)
+    ezfio.set_ao_basis_ao_power(d)
+    ezfio.set_ao_basis_ao_coef(d)
+    ezfio.set_ao_basis_ao_expo(d)
+    
+    
+    
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef_reim = qph5['mo_basis/mo_coef_complex'][()].tolist()
+    ezfio.set_mo_basis_mo_coef_complex(mo_coef_reim)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    
+    ##########################################
+    #                                        #
+    #            Integrals Mono              #
+    #                                        #
+    ##########################################
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_one_e_ints' in qph5.keys():
+            kin_ao_reim=qph5['ao_one_e_ints/ao_integrals_kinetic'][()].tolist()
+            ovlp_ao_reim=qph5['ao_one_e_ints/ao_integrals_overlap'][()].tolist()
+            ne_ao_reim=qph5['ao_one_e_ints/ao_integrals_n_e'][()].tolist()
+    
+            ezfio.set_ao_one_e_ints_ao_integrals_kinetic_complex(kin_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_overlap_complex(ovlp_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_n_e_complex(ne_ao_reim)
+            
+            ezfio.set_ao_one_e_ints_io_ao_integrals_kinetic('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_overlap('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_n_e('Read')
+      
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'mo_one_e_ints' in qph5.keys():
+            kin_mo_reim=qph5['mo_one_e_ints/mo_integrals_kinetic'][()].tolist()
+            #ovlp_mo_reim=qph5['mo_one_e_ints/mo_integrals_overlap'][()].tolist()
+            ne_mo_reim=qph5['mo_one_e_ints/mo_integrals_n_e'][()].tolist()
+    
+            ezfio.set_mo_one_e_ints_mo_integrals_kinetic_complex(kin_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_overlap_complex(ovlp_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            
+            ezfio.set_mo_one_e_ints_io_mo_integrals_kinetic('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_overlap('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+      
+    ##########################################
+    #                                        #
+    #               k-points                 #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kconserv = qph5['nuclei/kconserv'][()].tolist()
+    
+    ezfio.set_nuclei_kconserv(kconserv)
+    ezfio.set_nuclei_io_kconserv('Read')
+      
+    ##########################################
+    #                                        #
+    #             Integrals Bi               #
+    #                                        #
+    ##########################################
+    
+    # should this be in ao_basis? ao_two_e_ints?
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+            if 'df_ao_integrals' in qph5['ao_two_e_ints'].keys():
+    #            dfao_re0=qph5['ao_two_e_ints/df_ao_integrals_real'][()].transpose((3,2,1,0))
+    #            dfao_im0=qph5['ao_two_e_ints/df_ao_integrals_imag'][()].transpose((3,2,1,0))
+    #            dfao_cmplx0 = np.stack((dfao_re0,dfao_im0),axis=-1).tolist()
+    #            ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_cmplx0)
+                dfao_reim=qph5['ao_two_e_ints/df_ao_integrals'][()].tolist()
+                ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_reim)
+                ezfio.set_ao_two_e_ints_io_df_ao_integrals('Read')
+    
+        if 'mo_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+    #        dfmo_re0=qph5['mo_two_e_ints/df_mo_integrals_real'][()].transpose((3,2,1,0))
+    #        dfmo_im0=qph5['mo_two_e_ints/df_mo_integrals_imag'][()].transpose((3,2,1,0))
+    #        dfmo_cmplx0 = np.stack((dfmo_re0,dfmo_im0),axis=-1).tolist()
+    #        ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_cmplx0)
+            dfmo_reim=qph5['mo_two_e_ints/df_mo_integrals'][()].tolist()
+            ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_reim)
+            ezfio.set_mo_two_e_ints_io_df_mo_integrals('Read')
+   
+    return
+    
+
+if __name__ == '__main__':
+    ARGUMENTS = docopt(__doc__)
+
+    FILE = get_full_path(ARGUMENTS['FILE'])
+
+    if ARGUMENTS["--output"]:
+        EZFIO_FILE = get_full_path(ARGUMENTS["--output"])
+    else:
+        EZFIO_FILE = "{0}.ezfio".format(FILE)
+
+    with h5py.File(FILE,'r') as qph5:
+        do_kpts = ('kconserv' in qph5['nuclei'].keys())
+    if (do_kpts):
+        print("converting HDF5 to EZFIO for periodic system")
+        convert_kpts(EZFIO_FILE,FILE)
+    else:
+        print("converting HDF5 to EZFIO for molecular system")
+        convert_mol(EZFIO_FILE,FILE)
+
+#    sys.stdout.flush()
+#    if os.system("qp_run save_ortho_mos "+EZFIO_FILE) != 0:
+#        print("""Warning: You need to run
+#
+#         qp run save_ortho_mos
+#
+#to be sure your MOs will be orthogonal, which is not the case when
+#the MOs are read from output files (not enough precision in output).""")
+
diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio
index cbc81032..e050e9b9 100755
--- a/bin/qp_convert_output_to_ezfio
+++ b/bin/qp_convert_output_to_ezfio
@@ -89,6 +89,7 @@ def write_ezfio(res, filename):
     # W r i t e #
     # ~#~#~#~#~ #
 
+    ezfio.set_nuclei_is_complex(False)
     ezfio.set_nuclei_nucl_num(len(res.geometry))
     ezfio.set_nuclei_nucl_charge(charge)
 
diff --git a/configure b/configure
index 4f930a34..4ad726f9 100755
--- a/configure
+++ b/configure
@@ -365,7 +365,6 @@ EOF
               cd "\${QP_ROOT}"/external
               tar --gunzip --extract --file bse.tar.gz
               pip install -e basis_set_exchange-*
-EOF
     elif [[ ${PACKAGE} = zlib ]] ; then
 
             download ${ZLIB_URL} "${QP_ROOT}"/external/zlib.tar.gz
diff --git a/ocaml/Input_determinants_by_hand.ml b/ocaml/Input_determinants_by_hand.ml
index fb0aef7f..e79639ca 100644
--- a/ocaml/Input_determinants_by_hand.ml
+++ b/ocaml/Input_determinants_by_hand.ml
@@ -37,7 +37,9 @@ end = struct
     } [@@deriving sexp]
   ;;
 
-  let get_default = Qpackage.get_ezfio_default "determinants";;
+  let get_default = Qpackage.get_ezfio_default "determinants"
+
+  let is_complex = lazy (Ezfio.get_nuclei_is_complex () )
 
   let read_n_int () =
     if not (Ezfio.has_determinants_n_int()) then
@@ -48,12 +50,12 @@ end = struct
     ;
     Ezfio.get_determinants_n_int ()
     |> N_int_number.of_int
-  ;;
+
 
   let write_n_int n =
     N_int_number.to_int n
     |> Ezfio.set_determinants_n_int
-  ;;
+
 
 
   let read_bit_kind () =
@@ -64,12 +66,12 @@ end = struct
     ;
     Ezfio.get_determinants_bit_kind ()
     |> Bit_kind.of_int
-  ;;
+
 
   let write_bit_kind b =
     Bit_kind.to_int b
     |> Ezfio.set_determinants_bit_kind
-  ;;
+
 
   let read_n_det () =
     if not (Ezfio.has_determinants_n_det ()) then
@@ -77,7 +79,7 @@ end = struct
     ;
     Ezfio.get_determinants_n_det ()
     |> Det_number.of_int
-  ;;
+
 
   let read_n_det_qp_edit () =
     if not (Ezfio.has_determinants_n_det_qp_edit ()) then
@@ -87,18 +89,18 @@ end = struct
       end;
     Ezfio.get_determinants_n_det_qp_edit ()
     |> Det_number.of_int
-  ;;
+
 
   let write_n_det n =
     Det_number.to_int n
     |> Ezfio.set_determinants_n_det
-  ;;
+
 
   let write_n_det_qp_edit n =
     let n_det = read_n_det () |> Det_number.to_int in
     min n_det (Det_number.to_int n)
     |> Ezfio.set_determinants_n_det_qp_edit
-  ;;
+
 
   let read_n_states () =
     if not (Ezfio.has_determinants_n_states ()) then
@@ -106,7 +108,7 @@ end = struct
     ;
     Ezfio.get_determinants_n_states ()
     |> States_number.of_int
-  ;;
+
 
   let write_n_states n =
     let n_states =
@@ -130,7 +132,7 @@ end = struct
         Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| n_states |] ~data
         |> Ezfio.set_determinants_state_average_weight
       end
-  ;;
+
 
   let write_state_average_weight data =
       let n_states =
@@ -143,7 +145,7 @@ end = struct
       in
       Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| n_states |] ~data
       |> Ezfio.set_determinants_state_average_weight
-  ;;
+
 
   let read_state_average_weight () =
     let n_states =
@@ -171,7 +173,7 @@ end = struct
         |> Array.map Positive_float.of_float
       in
       (write_state_average_weight data; data)
-  ;;
+
 
   let read_expected_s2 () =
     if not (Ezfio.has_determinants_expected_s2 ()) then
@@ -186,12 +188,12 @@ end = struct
     ;
     Ezfio.get_determinants_expected_s2 ()
     |> Positive_float.of_float
-  ;;
+
 
   let write_expected_s2 s2 =
     Positive_float.to_float s2
     |> Ezfio.set_determinants_expected_s2
-  ;;
+
 
   let read_psi_coef ~read_only () =
     if not (Ezfio.has_determinants_psi_coef ()) then
@@ -200,19 +202,36 @@ end = struct
           read_n_states ()
           |> States_number.to_int
         in
-        Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| 1 ; n_states |]
-          ~data:(List.init n_states (fun i -> if (i=0) then 1. else 0. ))
+        (
+        if Lazy.force is_complex then
+          Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| 1 ; n_states |]
+            ~data:(List.init (2*n_states) (fun i -> if (i=0) then 1. else 0. ))
           |> Ezfio.set_determinants_psi_coef
+        else
+          Ezfio.ezfio_array_of_list ~rank:3 ~dim:[| 2 ; 1 ; n_states |]
+            ~data:(List.init n_states (fun i -> if (i=0) then 1. else 0. ))
+          |> Ezfio.set_determinants_psi_coef_complex
+        )
       end;
     begin
       if read_only then
-        Ezfio.get_determinants_psi_coef_qp_edit ()
+        begin
+          if Lazy.force is_complex then
+            Ezfio.get_determinants_psi_coef_complex_qp_edit ()
+          else
+            Ezfio.get_determinants_psi_coef_qp_edit ()
+        end
       else
-        Ezfio.get_determinants_psi_coef ()
+        begin
+          if Lazy.force is_complex then
+            Ezfio.get_determinants_psi_coef_complex ()
+          else
+            Ezfio.get_determinants_psi_coef ()
+        end
     end
     |> Ezfio.flattened_ezfio
     |> Array.map Det_coef.of_float
-  ;;
+
 
   let write_psi_coef ~n_det ~n_states c =
     let n_det = Det_number.to_int n_det
@@ -222,12 +241,23 @@ end = struct
     and n_states =
       States_number.to_int n_states
     in
-    let r = 
-      Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| n_det ; n_states |] ~data:c
-    in
-    Ezfio.set_determinants_psi_coef r;
-    Ezfio.set_determinants_psi_coef_qp_edit r
-  ;;
+    if Lazy.force is_complex then
+      begin
+        let r = 
+          Ezfio.ezfio_array_of_list ~rank:3 ~dim:[| 2 ; n_det ; n_states |] ~data:c
+        in
+        Ezfio.set_determinants_psi_coef_complex r;
+        Ezfio.set_determinants_psi_coef_complex_qp_edit r
+      end
+    else
+      begin
+        let r = 
+          Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| n_det ; n_states |] ~data:c
+        in
+        Ezfio.set_determinants_psi_coef r;
+        Ezfio.set_determinants_psi_coef_qp_edit r
+      end
+
 
 
   let read_psi_det ~read_only () =
@@ -276,7 +306,7 @@ end = struct
     |> Array.map (Determinant.of_int64_array
       ~n_int:(N_int_number.of_int n_int)
       ~alpha:n_alpha ~beta:n_beta )
-  ;;
+
 
   let write_psi_det ~n_int ~n_det d =
     let data = Array.to_list d
@@ -288,7 +318,7 @@ end = struct
     in
     Ezfio.set_determinants_psi_det r;
     Ezfio.set_determinants_psi_det_qp_edit r
-  ;;
+
 
 
   let read ?(full=true) () =
@@ -316,7 +346,7 @@ end = struct
     else
       (* No molecular orbitals, so no determinants *)
       None
-  ;;
+
 
   let write ?(force=false)
     { n_int                ;
@@ -341,7 +371,7 @@ end = struct
           write_psi_det ~n_int:n_int ~n_det:n_det psi_det
         end;
      write_state_average_weight state_average_weight
-  ;;
+
 
 
   let to_rst b =
@@ -557,10 +587,8 @@ psi_det                = %s
     in
 
 
-
-
     Generic_input_of_rst.evaluate_sexp t_of_sexp s
-  ;;
+
 
   let update_ndet n_det_new =
     Printf.printf "Reducing n_det to %d\n" (Det_number.to_int n_det_new);
@@ -596,7 +624,7 @@ psi_det                = %s
       { det with n_det = (Det_number.of_int n_det_new) }
     in
     write ~force:true new_det
-  ;;
+
 
   let extract_state istate =
     Printf.printf "Extracting state %d\n" (States_number.to_int istate);
@@ -628,7 +656,7 @@ psi_det                = %s
       { det with n_states = (States_number.of_int 1) }
     in
     write ~force:true new_det
-  ;;
+
 
   let extract_states range =
     Printf.printf "Extracting states %s\n" (Range.to_string range);
@@ -673,7 +701,7 @@ psi_det                = %s
       { det with n_states = (States_number.of_int @@ List.length sorted_list) }
     in
     write ~force:true new_det
-  ;;
+
 
 end
 
diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml
index a4e6176a..80969137 100644
--- a/ocaml/Input_mo_basis.ml
+++ b/ocaml/Input_mo_basis.ml
@@ -2,7 +2,6 @@ open Qptypes
 open Qputils
 open Sexplib.Std
 
-
 module Mo_basis : sig
   type t =
       { mo_num          : MO_number.t ;
@@ -10,7 +9,6 @@ module Mo_basis : sig
         mo_class        : MO_class.t array;
         mo_occ          : MO_occ.t array;
         mo_coef         : (MO_coef.t array) array;
-        mo_coef_imag    : (MO_coef.t array) array option;
         ao_md5          : MD5.t;
       } [@@deriving sexp]
   val read : unit -> t option
@@ -25,11 +23,13 @@ end = struct
         mo_class        : MO_class.t array;
         mo_occ          : MO_occ.t array;
         mo_coef         : (MO_coef.t array) array;
-        mo_coef_imag    : (MO_coef.t array) array option;
         ao_md5          : MD5.t;
       } [@@deriving sexp]
+
   let get_default = Qpackage.get_ezfio_default "mo_basis"
 
+  let is_complex = lazy (Ezfio.get_nuclei_is_complex () )
+
   let read_mo_label () =
     if not (Ezfio.has_mo_basis_mo_label ()) then
       Ezfio.set_mo_basis_mo_label "None"
@@ -43,14 +43,7 @@ end = struct
       mo_coef = Array.map (fun mo ->
           Array.init (Array.length mo)
             (fun i -> mo.(ordering.(i)))
-        ) b.mo_coef  ;
-      mo_coef_imag =
-        match b.mo_coef_imag with
-        | None -> None
-        | Some x -> Some ( Array.map (fun mo ->
-              Array.init (Array.length mo)
-                (fun i -> mo.(ordering.(i)))
-            ) x )
+        ) b.mo_coef
     }
 
   let read_ao_md5 () =
@@ -69,7 +62,10 @@ end = struct
       |> MD5.of_string
     in
     if (ao_md5 <> result) then
-      failwith "The current MOs don't correspond to the current AOs.";
+      begin
+        Printf.eprintf ":%s:\n:%s:\n%!" (MD5.to_string ao_md5) (MD5.to_string result);
+        failwith "The current MOs don't correspond to the current AOs."
+      end;
     result
 
 
@@ -77,7 +73,7 @@ end = struct
     let elec_alpha_num =
       Ezfio.get_electrons_elec_alpha_num ()
     in
-    let result = 
+    let result =
       Ezfio.get_mo_basis_mo_num ()
     in
     if result < elec_alpha_num then
@@ -120,29 +116,21 @@ end = struct
 
 
   let read_mo_coef () =
-    let a = Ezfio.get_mo_basis_mo_coef ()
-            |> Ezfio.flattened_ezfio
-            |> Array.map MO_coef.of_float
+    let a =
+      (
+        if Lazy.force is_complex then
+           Ezfio.get_mo_basis_mo_coef_complex  ()
+        else
+           Ezfio.get_mo_basis_mo_coef ()
+      )
+      |> Ezfio.flattened_ezfio
+      |> Array.map MO_coef.of_float
     in
     let mo_num = read_mo_num () |> MO_number.to_int in
     let ao_num = (Array.length a)/mo_num in
-    Array.init mo_num (fun j ->
-        Array.sub a (j*ao_num) (ao_num) 
-      )
-
-  let read_mo_coef_imag () =
-    if Ezfio.has_mo_basis_mo_coef_imag () then
-      let a =
-          Ezfio.get_mo_basis_mo_coef_imag ()
-                  |> Ezfio.flattened_ezfio
-                  |> Array.map MO_coef.of_float 
-      in
-      let mo_num = read_mo_num () |> MO_number.to_int in
-      let ao_num = (Array.length a)/mo_num in
-      Some (Array.init mo_num (fun j ->
-          Array.sub a (j*ao_num) (ao_num) 
-        ) )
-    else None
+      Array.init mo_num (fun j ->
+          Array.sub a (j*ao_num) (ao_num)
+        )
 
 
   let read () =
@@ -153,7 +141,6 @@ end = struct
           mo_class        = read_mo_class ();
           mo_occ          = read_mo_occ ();
           mo_coef         = read_mo_coef ();
-          mo_coef_imag    = read_mo_coef_imag ();
           ao_md5          = read_ao_md5 ();
         }
     else
@@ -161,7 +148,6 @@ end = struct
 
 
   let mo_coef_to_string mo_coef =
-  (*TODO : add imaginary part here *)
     let ao_num = Array.length mo_coef.(0)
     and mo_num = Array.length mo_coef in
     let rec print_five imin imax =
@@ -247,7 +233,6 @@ MO coefficients ::
 
 
   let to_string b =
-  (*TODO : add imaginary part here *)
     Printf.sprintf "
 mo_label        = \"%s\"
 mo_num          = %s
@@ -262,7 +247,7 @@ mo_coef         = %s
       (b.mo_occ |> Array.to_list |> list_map
          (MO_occ.to_string) |> String.concat ", " )
       (b.mo_coef |> Array.map
-         (fun x-> Array.map MO_coef.to_string x |> 
+         (fun x-> Array.map MO_coef.to_string x |>
            Array.to_list |> String.concat "," ) |>
        Array.to_list |> String.concat "\n" )
 
@@ -300,40 +285,30 @@ mo_coef         = %s
 
   let write_mo_coef a =
     let mo_num = Array.length a in
-    let ao_num = Array.length a.(0) in
+    let ao_num =
+      let x = Array.length a.(0) in
+      if Lazy.force is_complex then x/2 else x
+    in
     let data =
       Array.map (fun mo -> Array.map MO_coef.to_float mo
       |> Array.to_list) a
     |> Array.to_list
     |> List.concat
-    in Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| ao_num ; mo_num |] ~data
-    |> Ezfio.set_mo_basis_mo_coef
+    in
+    if Lazy.force is_complex then
+      (Ezfio.ezfio_array_of_list ~rank:3 ~dim:[| 2 ; ao_num ; mo_num |] ~data
+        |> Ezfio.set_mo_basis_mo_coef_complex )
+    else
+       (Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| ao_num ; mo_num |] ~data
+        |> Ezfio.set_mo_basis_mo_coef )
 
 
-  let write_mo_coef_imag a =
-    match a with
-    | None -> ()
-    | Some a -> 
-      begin
-        let mo_num = Array.length a in
-        let ao_num = Array.length a.(0) in
-        let data =
-          Array.map (fun mo -> Array.map MO_coef.to_float mo
-          |> Array.to_list) a
-        |> Array.to_list
-        |> List.concat
-        in Ezfio.ezfio_array_of_list ~rank:2 ~dim:[| ao_num ; mo_num |] ~data
-        |> Ezfio.set_mo_basis_mo_coef_imag
-      end
-
-
-  let write 
+  let write
       { mo_num          : MO_number.t ;
         mo_label        : MO_label.t;
         mo_class        : MO_class.t array;
         mo_occ          : MO_occ.t array;
         mo_coef         : (MO_coef.t array) array;
-        mo_coef_imag    : (MO_coef.t array) array option;
         ao_md5          : MD5.t;
       } =
       write_mo_num mo_num;
@@ -341,7 +316,6 @@ mo_coef         = %s
       write_mo_class mo_class;
       write_mo_occ mo_occ;
       write_mo_coef mo_coef;
-      write_mo_coef_imag mo_coef_imag;
       write_md5 ao_md5
 
 
diff --git a/ocaml/qptypes_generator.ml b/ocaml/qptypes_generator.ml
index ce99fc78..4431c790 100644
--- a/ocaml/qptypes_generator.ml
+++ b/ocaml/qptypes_generator.ml
@@ -166,6 +166,7 @@ let input_ezfio = "
 
 
 let untouched = "
+
 module MO_guess : sig
   type t [@@deriving sexp]
   val to_string : t -> string
diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py
index dc7d340e..abfea976 100644
--- a/scripts/generate_h_apply.py
+++ b/scripts/generate_h_apply.py
@@ -130,9 +130,15 @@ class H_apply(object):
   !$OMP END PARALLEL
 
   call dsort(H_jj,iorder,N_det)
-  do k=1,N_states
-    psi_coef(iorder(k),k) = 1.d0
-  enddo
+  if (is_complex) then
+    do k=1,N_states
+      psi_coef_complex(iorder(k),k) = (1.d0,0.d0)
+    enddo
+  else
+    do k=1,N_states
+      psi_coef(iorder(k),k) = 1.d0
+    enddo
+  endif
   deallocate(H_jj,iorder)
     """
 
@@ -141,7 +147,11 @@ class H_apply(object):
   if (s2_eig) then
     call make_s2_eigenfunction
   endif
-  SOFT_TOUCH psi_det psi_coef N_det
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex N_det
+  else
+    SOFT_TOUCH psi_det psi_coef N_det
+  endif
 """
     s["printout_now"]   = """write(6,*)  &
        100.*float(i_generator)/float(N_det_generators), '% in ', wall_1-wall_0, 's'"""
diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg
index 51d726da..2d9dd2fb 100644
--- a/src/ao_basis/EZFIO.cfg
+++ b/src/ao_basis/EZFIO.cfg
@@ -67,3 +67,8 @@ doc: Use normalized primitive functions
 interface: ezfio, provider
 default: true
 
+[ao_num_per_kpt]
+type: integer
+doc: Max number of |AOs| per kpt
+interface: ezfio, provider
+
diff --git a/src/ao_basis/aos_cplx.irp.f b/src/ao_basis/aos_cplx.irp.f
new file mode 100644
index 00000000..da1adb94
--- /dev/null
+++ b/src/ao_basis/aos_cplx.irp.f
@@ -0,0 +1,23 @@
+!BEGIN_PROVIDER [ integer, ao_num_per_kpt ]
+! implicit none
+! BEGIN_DOC
+! ! number of aos per kpt.
+! END_DOC
+! ao_num_per_kpt = ao_num/kpt_num
+!END_PROVIDER
+
+subroutine get_kpt_idx_ao(idx_full,k,i)
+  implicit none
+  BEGIN_DOC
+  ! idx_full is ao index in full range (up to ao_num)
+  ! k is index of the k-point for this ao
+  ! i is index of this ao within k-point k
+  ! this assumes that all kpts have the same number of aos
+  END_DOC
+
+  integer, intent(in) :: idx_full
+  integer, intent(out) :: i,k
+  i = mod(idx_full-1,ao_num_per_kpt)+1
+  k = (idx_full-1)/ao_num_per_kpt+1
+  ASSERT (k <= kpt_num)
+end
diff --git a/src/ao_one_e_ints/EZFIO.cfg b/src/ao_one_e_ints/EZFIO.cfg
index ed9cdc35..746b0357 100644
--- a/src/ao_one_e_ints/EZFIO.cfg
+++ b/src/ao_one_e_ints/EZFIO.cfg
@@ -4,10 +4,16 @@ doc: Nucleus-electron integrals in |AO| basis set
 size: (ao_basis.ao_num,ao_basis.ao_num)
 interface: ezfio
 
-[ao_integrals_n_e_imag]
+[ao_integrals_n_e_complex]
 type: double precision
-doc: Imaginary part of the nucleus-electron integrals in |AO| basis set
-size: (ao_basis.ao_num,ao_basis.ao_num)
+doc: Complex nucleus-electron integrals in |AO| basis set
+size: (2,ao_basis.ao_num,ao_basis.ao_num)
+interface: ezfio
+
+[ao_integrals_n_e_kpts]
+type: double precision
+doc: Complex nucleus-electron integrals in |AO| basis set
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,nuclei.kpt_num)
 interface: ezfio
 
 [io_ao_integrals_n_e]
@@ -23,10 +29,16 @@ doc: Kinetic energy integrals in |AO| basis set
 size: (ao_basis.ao_num,ao_basis.ao_num)
 interface: ezfio
 
-[ao_integrals_kinetic_imag]
+[ao_integrals_kinetic_complex]
 type: double precision
-doc: Imaginary part of the kinetic energy integrals in |AO| basis set
-size: (ao_basis.ao_num,ao_basis.ao_num)
+doc: Complex kinetic energy integrals in |AO| basis set
+size: (2,ao_basis.ao_num,ao_basis.ao_num)
+interface: ezfio
+
+[ao_integrals_kinetic_kpts]
+type: double precision
+doc: Complex kinetic energy integrals in |AO| basis set
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,nuclei.kpt_num)
 interface: ezfio
 
 [io_ao_integrals_kinetic]
@@ -42,10 +54,16 @@ doc: Pseudopotential integrals in |AO| basis set
 size: (ao_basis.ao_num,ao_basis.ao_num)
 interface: ezfio
 
-[ao_integrals_pseudo_imag]
+[ao_integrals_pseudo_complex]
 type: double precision
-doc: Imaginary part of the pseudopotential integrals in |AO| basis set
-size: (ao_basis.ao_num,ao_basis.ao_num)
+doc: Complex pseudopotential integrals in |AO| basis set
+size: (2,ao_basis.ao_num,ao_basis.ao_num)
+interface: ezfio
+
+[ao_integrals_pseudo_kpts]
+type: double precision
+doc: Complex pseudopotential integrals in |AO| basis set
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,nuclei.kpt_num)
 interface: ezfio
 
 [io_ao_integrals_pseudo]
@@ -61,10 +79,16 @@ doc: Overlap integrals in |AO| basis set
 size: (ao_basis.ao_num,ao_basis.ao_num)
 interface: ezfio
 
-[ao_integrals_overlap_imag]
+[ao_integrals_overlap_complex]
 type: double precision
-doc: Imaginary part of the overlap integrals in |AO| basis set
-size: (ao_basis.ao_num,ao_basis.ao_num)
+doc: Complex overlap integrals in |AO| basis set
+size: (2,ao_basis.ao_num,ao_basis.ao_num)
+interface: ezfio
+
+[ao_integrals_overlap_kpts]
+type: double precision
+doc: Complex overlap integrals in |AO| basis set
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,nuclei.kpt_num)
 interface: ezfio
 
 [io_ao_integrals_overlap]
@@ -80,10 +104,16 @@ doc: Combined integrals in |AO| basis set
 size: (ao_basis.ao_num,ao_basis.ao_num) 
 interface: ezfio
 
-[ao_one_e_integrals_imag]
+[ao_one_e_integrals_complex]
 type: double precision
-doc: Imaginary part of the combined integrals in |AO| basis set
-size: (ao_basis.ao_num,ao_basis.ao_num) 
+doc: Complex combined integrals in |AO| basis set
+size: (2,ao_basis.ao_num,ao_basis.ao_num) 
+interface: ezfio
+
+[ao_one_e_integrals_kpts]
+type: double precision
+doc: Complex combined integrals in |AO| basis set
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,nuclei.kpt_num) 
 interface: ezfio
 
 [io_ao_one_e_integrals]
diff --git a/src/ao_one_e_ints/ao_one_e_ints.irp.f b/src/ao_one_e_ints/ao_one_e_ints.irp.f
index 65981dc9..01daf045 100644
--- a/src/ao_one_e_ints/ao_one_e_ints.irp.f
+++ b/src/ao_one_e_ints/ao_one_e_ints.irp.f
@@ -5,7 +5,10 @@
   BEGIN_DOC
  ! One-electron Hamiltonian in the |AO| basis.
   END_DOC
-
+  if (is_complex) then
+    print*,"you shouldn't be here for complex",irp_here
+    stop -1
+  endif
   IF (read_ao_one_e_integrals) THEN
      call ezfio_get_ao_one_e_ints_ao_one_e_integrals(ao_one_e_integrals)
   ELSE
@@ -24,24 +27,85 @@
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_one_e_integrals_imag,(ao_num,ao_num)]
+!BEGIN_PROVIDER [ double precision, ao_one_e_integrals_imag,(ao_num,ao_num)]
+!  implicit none
+!  integer :: i,j,n,l
+!  BEGIN_DOC
+! ! One-electron Hamiltonian in the |AO| basis.
+!  END_DOC
+!
+!  IF (read_ao_one_e_integrals) THEN
+!     call ezfio_get_ao_one_e_ints_ao_one_e_integrals_imag(ao_one_e_integrals_imag)
+!  ELSE
+!        ao_one_e_integrals_imag = ao_integrals_n_e_imag + ao_kinetic_integrals_imag
+!
+!        IF (DO_PSEUDO) THEN
+!              ao_one_e_integrals_imag += ao_pseudo_integrals_imag
+!        ENDIF
+!  ENDIF
+!
+!  IF (write_ao_one_e_integrals) THEN
+!       call ezfio_set_ao_one_e_ints_ao_one_e_integrals_imag(ao_one_e_integrals_imag)
+!       print *,  'AO one-e integrals written to disk'
+!  ENDIF
+!
+!END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, ao_one_e_integrals_complex,(ao_num,ao_num)]
+&BEGIN_PROVIDER [ double precision, ao_one_e_integrals_diag_complex,(ao_num)]
   implicit none
   integer :: i,j,n,l
   BEGIN_DOC
  ! One-electron Hamiltonian in the |AO| basis.
   END_DOC
-
+  
   IF (read_ao_one_e_integrals) THEN
-     call ezfio_get_ao_one_e_ints_ao_one_e_integrals(ao_one_e_integrals_imag)
+     call ezfio_get_ao_one_e_ints_ao_one_e_integrals_complex(ao_one_e_integrals_complex)
   ELSE
-     print *,  irp_here, ': Not yet implemented'
-     stop -1
+        ao_one_e_integrals_complex = ao_integrals_n_e_complex + ao_kinetic_integrals_complex
+
+        IF (DO_PSEUDO) THEN
+              ao_one_e_integrals_complex += ao_pseudo_integrals_complex
+        ENDIF
   ENDIF
 
+  DO j = 1, ao_num
+    ao_one_e_integrals_diag_complex(j) = dble(ao_one_e_integrals_complex(j,j))
+  ENDDO
+
   IF (write_ao_one_e_integrals) THEN
-       call ezfio_set_ao_one_e_ints_ao_one_e_integrals(ao_one_e_integrals_imag)
+       call ezfio_set_ao_one_e_ints_ao_one_e_integrals_complex(ao_one_e_integrals_complex)
        print *,  'AO one-e integrals written to disk'
   ENDIF
-
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, ao_one_e_integrals_kpts,(ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+&BEGIN_PROVIDER [ double precision, ao_one_e_integrals_diag_kpts,(ao_num_per_kpt,kpt_num)]
+  implicit none
+  integer :: j,k
+  BEGIN_DOC
+ ! One-electron Hamiltonian in the |AO| basis.
+  END_DOC
+  
+  if (read_ao_one_e_integrals) then
+     call ezfio_get_ao_one_e_ints_ao_one_e_integrals_kpts(ao_one_e_integrals_kpts)
+  else
+        ao_one_e_integrals_kpts = ao_integrals_n_e_kpts + ao_kinetic_integrals_kpts
+
+        if (do_pseudo) then
+              ao_one_e_integrals_kpts += ao_pseudo_integrals_kpts
+        endif
+  endif
+
+  do k = 1, kpt_num
+    do j = 1, ao_num_per_kpt
+      ao_one_e_integrals_diag_kpts(j,k) = dble(ao_one_e_integrals_kpts(j,j,k))
+    enddo
+  enddo
+
+  if (write_ao_one_e_integrals) then
+       call ezfio_set_ao_one_e_ints_ao_one_e_integrals_kpts(ao_one_e_integrals_kpts)
+       print *,  'AO one-e integrals written to disk'
+  endif
 END_PROVIDER
 
diff --git a/src/ao_one_e_ints/ao_ortho_canonical.irp.f b/src/ao_one_e_ints/ao_ortho_cano.irp.f
similarity index 96%
rename from src/ao_one_e_ints/ao_ortho_canonical.irp.f
rename to src/ao_one_e_ints/ao_ortho_cano.irp.f
index 45275a06..2c1d00c2 100644
--- a/src/ao_one_e_ints/ao_ortho_canonical.irp.f
+++ b/src/ao_one_e_ints/ao_ortho_cano.irp.f
@@ -84,13 +84,13 @@ END_PROVIDER
 
 
 
-BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_ortho_cano_coef_inv, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
 ! ao_ortho_canonical_coef^(-1)
  END_DOC
  call get_inverse(ao_ortho_canonical_coef,size(ao_ortho_canonical_coef,1),&
-     ao_num, ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1))
+     ao_num, ao_ortho_cano_coef_inv, size(ao_ortho_cano_coef_inv,1))
 END_PROVIDER
 
  BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num,ao_num)]
diff --git a/src/ao_one_e_ints/ao_ortho_cano_cplx.irp.f b/src/ao_one_e_ints/ao_ortho_cano_cplx.irp.f
new file mode 100644
index 00000000..1245ae6e
--- /dev/null
+++ b/src/ao_one_e_ints/ao_ortho_cano_cplx.irp.f
@@ -0,0 +1,121 @@
+!todo: add kpts
+BEGIN_PROVIDER [ complex*16, ao_cart_to_sphe_coef_complex, (ao_num,ao_cart_to_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! complex version of ao_cart_to_sphe_coef
+ END_DOC
+ call zlacp2('A',ao_num,ao_cart_to_sphe_num, &
+        ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), &
+        ao_cart_to_sphe_coef_complex,size(ao_cart_to_sphe_coef_complex,1))
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, ao_cart_to_sphe_overlap_complex, (ao_cart_to_sphe_num,ao_cart_to_sphe_num) ]
+ implicit none
+ BEGIN_DOC
+ ! AO overlap matrix in the spherical basis set
+ END_DOC
+ complex*16, allocatable :: S(:,:)
+ allocate (S(ao_cart_to_sphe_num,ao_num))
+
+ call zgemm('T','N',ao_cart_to_sphe_num,ao_num,ao_num, (1.d0,0.d0), &
+   ao_cart_to_sphe_coef_complex,size(ao_cart_to_sphe_coef_complex,1), &
+   ao_overlap_complex,size(ao_overlap_complex,1), (0.d0,0.d0), &
+   S, size(S,1))
+
+ call zgemm('N','N',ao_cart_to_sphe_num,ao_cart_to_sphe_num,ao_num, (1.d0,0.d0), &
+   S, size(S,1), &
+   ao_cart_to_sphe_coef_complex,size(ao_cart_to_sphe_coef_complex,1), (0.d0,0.d0), &
+   ao_cart_to_sphe_overlap_complex,size(ao_cart_to_sphe_overlap_complex,1))
+
+ deallocate(S)
+
+END_PROVIDER
+
+
+
+
+BEGIN_PROVIDER [ complex*16, ao_ortho_cano_coef_inv_cplx, (ao_num,ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_ortho_canonical_coef_complex^(-1)
+ END_DOC
+ call get_inverse_complex(ao_ortho_canonical_coef_complex,size(ao_ortho_canonical_coef_complex,1),&
+     ao_num, ao_ortho_cano_coef_inv_cplx, size(ao_ortho_cano_coef_inv_cplx,1))
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, ao_ortho_canonical_coef_complex, (ao_num,ao_num)]
+&BEGIN_PROVIDER [ integer, ao_ortho_canonical_num_complex ]
+  implicit none
+  BEGIN_DOC
+! TODO: ao_ortho_canonical_num_complex should be the same as the real version
+!       maybe if the providers weren't linked we could avoid making a complex one?
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_canonical_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_canonical orbital
+  END_DOC
+  integer :: i
+  ao_ortho_canonical_coef_complex = (0.d0,0.d0)
+  do i=1,ao_num
+    ao_ortho_canonical_coef_complex(i,i) = (1.d0,0.d0)
+  enddo
+
+!call ortho_lowdin(ao_overlap,size(ao_overlap,1),ao_num,ao_ortho_canonical_coef,size(ao_ortho_canonical_coef,1),ao_num)
+!ao_ortho_canonical_num=ao_num
+!return
+
+  if (ao_cartesian) then
+
+    ao_ortho_canonical_num_complex = ao_num
+    call ortho_canonical_complex(ao_overlap,size(ao_overlap,1), &
+      ao_num,ao_ortho_canonical_coef_complex,size(ao_ortho_canonical_coef_complex,1), &
+      ao_ortho_canonical_num_complex,lin_dep_cutoff)
+
+
+  else
+
+    complex*16, allocatable :: S(:,:)
+
+    allocate(S(ao_cart_to_sphe_num,ao_cart_to_sphe_num))
+    S = (0.d0,0.d0)
+    do i=1,ao_cart_to_sphe_num
+      S(i,i) = (1.d0,0.d0)
+    enddo
+
+    ao_ortho_canonical_num_complex = ao_cart_to_sphe_num
+    call ortho_canonical_complex(ao_cart_to_sphe_overlap_complex, size(ao_cart_to_sphe_overlap_complex,1), &
+      ao_cart_to_sphe_num, S, size(S,1), ao_ortho_canonical_num_complex,lin_dep_cutoff)
+
+    call zgemm('N','N', ao_num, ao_ortho_canonical_num_complex, ao_cart_to_sphe_num, (1.d0,0.d0), &
+      ao_cart_to_sphe_coef_complex, size(ao_cart_to_sphe_coef_complex,1), &
+      S, size(S,1), &
+      (0.d0,0.d0), ao_ortho_canonical_coef_complex, size(ao_ortho_canonical_coef_complex,1))
+
+    deallocate(S)
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_ortho_canonical_overlap_complex, (ao_ortho_canonical_num_complex,ao_ortho_canonical_num_complex)]
+  implicit none
+  BEGIN_DOC
+! overlap matrix of the ao_ortho_canonical.
+! Expected to be the Identity
+  END_DOC
+  integer                        :: i,j,k,l
+  complex*16               :: c
+  do j=1, ao_ortho_canonical_num_complex
+    do i=1, ao_ortho_canonical_num_complex
+      ao_ortho_canonical_overlap_complex(i,j) = (0.d0,0.d0)
+    enddo
+  enddo
+  do j=1, ao_ortho_canonical_num_complex
+    do k=1, ao_num
+      c = (0.d0,0.d0)
+      do l=1, ao_num
+        c +=  conjg(ao_ortho_canonical_coef_complex(l,j)) * ao_overlap_complex(l,k)
+      enddo
+      do i=1, ao_ortho_canonical_num_complex
+        ao_ortho_canonical_overlap_complex(i,j) += ao_ortho_canonical_coef_complex(k,i) * c
+      enddo
+    enddo
+  enddo
+END_PROVIDER
diff --git a/src/ao_one_e_ints/ao_ortho_cano_kpts.irp.f b/src/ao_one_e_ints/ao_ortho_cano_kpts.irp.f
new file mode 100644
index 00000000..acfae4f8
--- /dev/null
+++ b/src/ao_one_e_ints/ao_ortho_cano_kpts.irp.f
@@ -0,0 +1,196 @@
+!todo: add kpts
+
+ BEGIN_PROVIDER [ complex*16, ao_cart_to_sphe_coef_kpts, (ao_num_per_kpt,ao_num_per_kpt)]
+&BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num_per_kpt ]
+  implicit none
+  BEGIN_DOC
+! Coefficients to go from cartesian to spherical coordinates in the current
+! basis set
+  END_DOC
+  integer :: i
+  integer, external              :: ao_power_index
+  integer                        :: ibegin,j,k
+  integer                        :: prev
+  prev = 0
+  ao_cart_to_sphe_coef_kpts(:,:) = (0.d0,0.d0)
+  ! Assume order provided by ao_power_index
+  i = 1
+  ao_cart_to_sphe_num_per_kpt = 0
+  do while (i <= ao_num_per_kpt)
+    select case ( ao_l(i) )
+      case (0)
+        ao_cart_to_sphe_num_per_kpt += 1
+        ao_cart_to_sphe_coef_kpts(i,ao_cart_to_sphe_num_per_kpt) = (1.d0,0.d0)
+        i += 1
+      BEGIN_TEMPLATE
+      case ($SHELL)
+        if (ao_power(i,1) == $SHELL) then
+          do k=1,size(cart_to_sphe_$SHELL,2)
+            do j=1,size(cart_to_sphe_$SHELL,1)
+              ao_cart_to_sphe_coef_kpts(i+j-1,ao_cart_to_sphe_num_per_kpt+k) = dcmplx(cart_to_sphe_$SHELL(j,k),0.d0)
+            enddo
+          enddo
+          i += size(cart_to_sphe_$SHELL,1)
+          ao_cart_to_sphe_num_per_kpt += size(cart_to_sphe_$SHELL,2)
+        endif
+      SUBST [ SHELL ]
+        1;;
+        2;;
+        3;;
+        4;;
+        5;;
+        6;;
+        7;;
+        8;;
+        9;;
+      END_TEMPLATE
+      case default
+        stop 'Error in ao_cart_to_sphe_kpts : angular momentum too high'
+    end select
+  enddo
+
+END_PROVIDER
+!BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num_per_kpt ]
+!  implicit none
+!  ao_cart_to_sphe_num_per_kpt = ao_cart_to_sphe_num / kpt_num
+!END_PROVIDER
+!
+!BEGIN_PROVIDER [ complex*16, ao_cart_to_sphe_coef_kpts, (ao_num_per_kpt,ao_cart_to_sphe_num_per_kpt) ]
+! implicit none
+! BEGIN_DOC
+! ! complex version of ao_cart_to_sphe_coef for one k-point
+! END_DOC
+! call zlacp2('A',ao_num_per_kpt,ao_cart_to_sphe_num_per_kpt, &
+!        ao_cart_to_sphe_coef,size(ao_cart_to_sphe_coef,1), &
+!        ao_cart_to_sphe_coef_kpts,size(ao_cart_to_sphe_coef_kpts,1))
+!END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, ao_cart_to_sphe_overlap_kpts, (ao_cart_to_sphe_num_per_kpt,ao_cart_to_sphe_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! AO overlap matrix in the spherical basis set
+  END_DOC
+  integer :: k
+  complex*16, allocatable :: S(:,:)
+  allocate (S(ao_cart_to_sphe_num_per_kpt,ao_num_per_kpt))
+
+  !todo: call with (:,:,k) vs (1,1,k)? is there a difference? does one create a temporary array?
+  do k=1, kpt_num
+   
+    call zgemm('T','N',ao_cart_to_sphe_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt, (1.d0,0.d0), &
+      ao_cart_to_sphe_coef_kpts,size(ao_cart_to_sphe_coef_kpts,1), &
+      ao_overlap_kpts(:,:,k),size(ao_overlap_kpts,1), (0.d0,0.d0), &
+      S, size(S,1))
+   
+    call zgemm('N','N',ao_cart_to_sphe_num_per_kpt,ao_cart_to_sphe_num_per_kpt,ao_num_per_kpt, (1.d0,0.d0), &
+      S, size(S,1), &
+      ao_cart_to_sphe_coef_kpts,size(ao_cart_to_sphe_coef_kpts,1), (0.d0,0.d0), &
+      ao_cart_to_sphe_overlap_kpts(:,:,k),size(ao_cart_to_sphe_overlap_kpts,1))
+  enddo 
+  deallocate(S)
+
+END_PROVIDER
+
+
+
+
+BEGIN_PROVIDER [ complex*16, ao_ortho_cano_coef_inv_kpts, (ao_num_per_kpt,ao_num_per_kpt, kpt_num)]
+ implicit none
+ BEGIN_DOC
+! ao_ortho_canonical_coef_complex^(-1)
+ END_DOC
+ integer :: k
+ do k=1, kpt_num
+   call get_inverse_complex(ao_ortho_canonical_coef_kpts,size(ao_ortho_canonical_coef_kpts,1),&
+     ao_num_per_kpt, ao_ortho_cano_coef_inv_kpts, size(ao_ortho_cano_coef_inv_kpts,1))
+ enddo
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, ao_ortho_canonical_coef_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+&BEGIN_PROVIDER [ integer, ao_ortho_canonical_num_per_kpt, (kpt_num) ]
+&BEGIN_PROVIDER [ integer, ao_ortho_canonical_num_per_kpt_max ]
+  implicit none
+  BEGIN_DOC
+! TODO: ao_ortho_canonical_num_complex should be the same as the real version
+!       maybe if the providers weren't linked we could avoid making a complex one?
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_canonical_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_canonical orbital
+  END_DOC
+  integer :: i,k
+  ao_ortho_canonical_coef_kpts = (0.d0,0.d0)
+  do k=1,kpt_num
+    do i=1,ao_num
+      ao_ortho_canonical_coef_kpts(i,i,k) = (1.d0,0.d0)
+    enddo
+  enddo
+
+!call ortho_lowdin(ao_overlap,size(ao_overlap,1),ao_num,ao_ortho_canonical_coef,size(ao_ortho_canonical_coef,1),ao_num)
+!ao_ortho_canonical_num=ao_num
+!return
+
+  if (ao_cartesian) then
+
+    ao_ortho_canonical_num_per_kpt = ao_num_per_kpt
+    do k=1,kpt_num
+      call ortho_canonical_complex(ao_overlap_kpts(:,:,k),size(ao_overlap_kpts,1), &
+        ao_num_per_kpt,ao_ortho_canonical_coef_kpts(:,:,k),size(ao_ortho_canonical_coef_kpts,1), &
+        ao_ortho_canonical_num_per_kpt(k),lin_dep_cutoff)
+    enddo
+
+
+  else
+
+    complex*16, allocatable :: S(:,:)
+
+    allocate(S(ao_cart_to_sphe_num_per_kpt,ao_cart_to_sphe_num_per_kpt))
+    do k=1,kpt_num
+      S = (0.d0,0.d0)
+      do i=1,ao_cart_to_sphe_num_per_kpt
+        S(i,i) = (1.d0,0.d0)
+      enddo
+
+      ao_ortho_canonical_num_per_kpt(k) = ao_cart_to_sphe_num_per_kpt
+      call ortho_canonical_complex(ao_cart_to_sphe_overlap_kpts, size(ao_cart_to_sphe_overlap_kpts,1), &
+        ao_cart_to_sphe_num_per_kpt, S, size(S,1), ao_ortho_canonical_num_per_kpt(k),lin_dep_cutoff)
+
+      call zgemm('N','N', ao_num_per_kpt, ao_ortho_canonical_num_per_kpt(k), ao_cart_to_sphe_num_per_kpt, (1.d0,0.d0), &
+        ao_cart_to_sphe_coef_kpts, size(ao_cart_to_sphe_coef_kpts,1), &
+        S, size(S,1), &
+        (0.d0,0.d0), ao_ortho_canonical_coef_kpts(:,:,k), size(ao_ortho_canonical_coef_kpts,1))
+    enddo
+
+    deallocate(S)
+  endif
+  ao_ortho_canonical_num_per_kpt_max = maxval(ao_ortho_canonical_num_per_kpt)
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_ortho_canonical_overlap_kpts, (ao_ortho_canonical_num_per_kpt_max,ao_ortho_canonical_num_per_kpt_max,kpt_num)]
+  implicit none
+  BEGIN_DOC
+! overlap matrix of the ao_ortho_canonical.
+! Expected to be the Identity
+  END_DOC
+  integer                        :: i,j,k,l,kk
+  complex*16               :: c
+  do k=1,kpt_num
+    do j=1, ao_ortho_canonical_num_per_kpt_max
+      do i=1, ao_ortho_canonical_num_per_kpt_max
+        ao_ortho_canonical_overlap_kpts(i,j,k) = (0.d0,0.d0)
+      enddo
+    enddo
+  enddo
+  do kk=1,kpt_num
+    do j=1, ao_ortho_canonical_num_per_kpt(kk)
+      do k=1, ao_num_per_kpt
+        c = (0.d0,0.d0)
+        do l=1, ao_num_per_kpt
+          c +=  conjg(ao_ortho_canonical_coef_kpts(l,j,kk)) * ao_overlap_kpts(l,k,kk)
+        enddo
+        do i=1, ao_ortho_canonical_num_per_kpt(kk)
+          ao_ortho_canonical_overlap_kpts(i,j,kk) += ao_ortho_canonical_coef_kpts(k,i,kk) * c
+        enddo
+      enddo
+    enddo
+  enddo
+END_PROVIDER
diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f
index 11c95e42..7b51fb54 100644
--- a/src/ao_one_e_ints/ao_overlap.irp.f
+++ b/src/ao_one_e_ints/ao_overlap.irp.f
@@ -70,23 +70,69 @@
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ]
- implicit none
- BEGIN_DOC
- ! Imaginary part of the overlap
- END_DOC
- ao_overlap_imag = 0.d0
-END_PROVIDER
+!BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ]
+! implicit none
+! BEGIN_DOC
+! ! Imaginary part of the overlap
+! END_DOC
+!  if (read_ao_integrals_overlap) then
+!     call ezfio_get_ao_one_e_ints_ao_integrals_overlap_imag(ao_overlap_imag(1:ao_num, 1:ao_num))
+!     print *,  'AO overlap integrals read from disk'
+!  else
+!    ao_overlap_imag = 0.d0
+!  endif
+!  if (write_ao_integrals_overlap) then
+!     call ezfio_set_ao_one_e_ints_ao_integrals_overlap_imag(ao_overlap_imag(1:ao_num, 1:ao_num))
+!     print *,  'AO overlap integrals written to disk'
+!  endif
+!END_PROVIDER
 
 BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ]
   implicit none
   BEGIN_DOC
   ! Overlap for complex AOs
   END_DOC
-  integer                        :: i,j
-  do j=1,ao_num
-    do i=1,ao_num
-      ao_overlap_complex(i,j) = dcmplx( ao_overlap(i,j), ao_overlap_imag(i,j) )
+  if (read_ao_integrals_overlap) then
+    call ezfio_get_ao_one_e_ints_ao_integrals_overlap_complex(ao_overlap_complex)
+    print *,  'AO overlap integrals read from disk'
+  else
+    print*,'complex AO overlap ints must be provided',irp_here
+  endif
+  if (write_ao_integrals_overlap) then
+     call ezfio_set_ao_one_e_ints_ao_integrals_overlap_complex(ao_overlap_complex)
+     print *,  'AO overlap integrals written to disk'
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, ao_overlap_kpts, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex AOs
+  END_DOC
+  if (read_ao_integrals_overlap) then
+    call ezfio_get_ao_one_e_ints_ao_integrals_overlap_kpts(ao_overlap_kpts)
+    print *,  'AO overlap integrals read from disk'
+  else
+    print*,'complex AO overlap ints must be provided',irp_here
+  endif
+  if (write_ao_integrals_overlap) then
+     call ezfio_set_ao_one_e_ints_ao_integrals_overlap_kpts(ao_overlap_kpts)
+     print *,  'AO overlap integrals written to disk'
+  endif
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, ao_overlap_kpts_real, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex AOs
+  END_DOC
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,ao_num_per_kpt
+      do i=1,ao_num_per_kpt
+        ao_overlap_kpts_real(i,j,k) = dble(ao_overlap_kpts(i,j,k))
+      enddo
     enddo
   enddo
 END_PROVIDER
@@ -109,10 +155,15 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
   double precision :: A_center(3), B_center(3)
   integer :: power_A(3), power_B(3)
   double precision :: lower_exp_val, dx
-  if (is_periodic) then
-    do j=1,ao_num
-      do i= 1,ao_num
-        ao_overlap_abs(i,j)= cdabs(ao_overlap_complex(i,j))
+  if (is_complex) then
+    ao_overlap_abs = 0.d0
+    integer :: k, ishift
+    do k=1,kpt_num
+      ishift = (k-1)*ao_num_per_kpt
+      do j=1,ao_num_per_kpt
+        do i= 1,ao_num_per_kpt
+          ao_overlap_abs(ishift+i,ishift+j)= cdabs(ao_overlap_kpts(i,j,k))
+        enddo
       enddo
     enddo
   else
@@ -175,6 +226,18 @@ BEGIN_PROVIDER [ complex*16, S_inv_complex,(ao_num,ao_num) ]
     ao_num,ao_num,S_inv_complex,size(S_inv_complex,1),lin_dep_cutoff)
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, S_inv_kpts,(ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+ implicit none
+ BEGIN_DOC
+! Inverse of the overlap matrix
+ END_DOC
+ integer :: k
+ do k=1,kpt_num
+  call get_pseudo_inverse_complex(ao_overlap_kpts(1,1,k), &
+     size(ao_overlap_kpts,1),ao_num_per_kpt,ao_num_per_kpt,S_inv_kpts(1,1,k),size(S_inv_kpts,1),lin_dep_cutoff)
+ enddo
+END_PROVIDER
+
 BEGIN_PROVIDER [ double precision, S_half_inv, (AO_num,AO_num) ]
 
   BEGIN_DOC
@@ -233,6 +296,125 @@ BEGIN_PROVIDER [ double precision, S_half_inv, (AO_num,AO_num) ]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, S_half_inv_complex, (AO_num,AO_num) ]
+
+  BEGIN_DOC
+!   :math:`X = S^{-1/2}` obtained by SVD
+  END_DOC
+
+  implicit none
+
+  integer                         :: num_linear_dependencies
+  integer                         :: LDA, LDC
+  double precision, allocatable   :: D(:)
+  complex*16, allocatable   :: U(:,:),Vt(:,:)
+  integer                         :: info, i, j, k
+  double precision, parameter     :: threshold_overlap_AO_eigenvalues = 1.d-6
+
+  LDA = size(AO_overlap_complex,1)
+  LDC = size(S_half_inv_complex,1)
+
+  allocate(         &
+    U(LDC,AO_num),  &
+    Vt(LDA,AO_num), &
+    D(AO_num))
+
+  call svd_complex(    &
+       ao_overlap_complex,LDA, &
+       U,LDC,          &
+       D,              &
+       Vt,LDA,         &
+       AO_num,AO_num)
+
+  num_linear_dependencies = 0
+  do i=1,AO_num
+    print*,D(i)
+    if(abs(D(i)) <= threshold_overlap_AO_eigenvalues) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0/sqrt(D(i))
+    endif
+    do j=1,AO_num
+      S_half_inv_complex(j,i) = 0.d0
+    enddo
+  enddo
+  write(*,*) 'linear dependencies',num_linear_dependencies
+
+  do k=1,AO_num
+    if(D(k) /= 0.d0) then
+      do j=1,AO_num
+        do i=1,AO_num
+          S_half_inv_complex(i,j) = S_half_inv_complex(i,j) + U(i,k)*D(k)*Vt(k,j)
+        enddo
+      enddo
+    endif
+  enddo
+
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, S_half_inv_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+
+  BEGIN_DOC
+!   :math:`X = S^{-1/2}` obtained by SVD
+  END_DOC
+
+  implicit none
+
+  integer                         :: num_linear_dependencies
+  integer                         :: LDA, LDC
+  double precision, allocatable   :: D(:)
+  complex*16, allocatable   :: U(:,:),Vt(:,:)
+  integer                         :: info, i, j, k,kk
+  double precision, parameter     :: threshold_overlap_AO_eigenvalues = 1.d-6
+
+  LDA = size(ao_overlap_kpts,1)
+  LDC = size(s_half_inv_kpts,1)
+
+  allocate(         &
+    U(LDC,ao_num_per_kpt),  &
+    Vt(LDA,ao_num_per_kpt), &
+    D(ao_num_per_kpt))
+  
+  do kk=1,kpt_num
+    call svd_complex(    &
+         ao_overlap_kpts(1,1,kk),LDA, &
+         U,LDC,          &
+         D,              &
+         Vt,LDA,         &
+         ao_num_per_kpt,ao_num_per_kpt)
+  
+    num_linear_dependencies = 0
+    do i=1,ao_num_per_kpt
+      print*,D(i)
+      if(abs(D(i)) <= threshold_overlap_AO_eigenvalues) then
+        D(i) = 0.d0
+        num_linear_dependencies += 1
+      else
+        ASSERT (D(i) > 0.d0)
+        D(i) = 1.d0/sqrt(D(i))
+      endif
+      do j=1,ao_num_per_kpt
+        S_half_inv_kpts(j,i,kk) = 0.d0
+      enddo
+    enddo
+    write(*,*) 'linear dependencies, k: ',num_linear_dependencies,', ',kk 
+  
+    do k=1,ao_num_per_kpt
+      if(D(k) /= 0.d0) then
+        do j=1,ao_num_per_kpt
+          do i=1,ao_num_per_kpt
+            S_half_inv_kpts(i,j,kk) = S_half_inv_kpts(i,j,kk) + U(i,k)*D(k)*Vt(k,j)
+          enddo
+        enddo
+      endif
+    enddo
+  enddo
+
+END_PROVIDER
+
 
 BEGIN_PROVIDER [ double precision, S_half, (ao_num,ao_num)  ]
  implicit none
@@ -268,3 +450,73 @@ BEGIN_PROVIDER [ double precision, S_half, (ao_num,ao_num)  ]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, S_half_complex, (ao_num,ao_num)  ]
+ implicit none
+ BEGIN_DOC
+ ! :math:`S^{1/2}`
+ END_DOC
+
+  integer :: i,j,k
+  complex*16, allocatable  :: U(:,:)
+  complex*16, allocatable  :: Vt(:,:)
+  double precision, allocatable  :: D(:)
+
+  allocate(U(ao_num,ao_num),Vt(ao_num,ao_num),D(ao_num))
+
+  call svd_complex(ao_overlap_complex,size(ao_overlap_complex,1),U,size(U,1),D,Vt,size(Vt,1),ao_num,ao_num)
+
+  do i=1,ao_num
+    D(i) = dsqrt(D(i))
+    do j=1,ao_num
+      S_half_complex(j,i) = (0.d0,0.d0)
+    enddo
+  enddo
+
+  do k=1,ao_num
+      do j=1,ao_num
+        do i=1,ao_num
+          S_half_complex(i,j) = S_half_complex(i,j) + U(i,k)*D(k)*Vt(k,j)
+        enddo
+      enddo
+  enddo
+
+  deallocate(U,Vt,D)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, S_half_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)  ]
+ implicit none
+ BEGIN_DOC
+ ! :math:`S^{1/2}`
+ END_DOC
+
+  integer :: i,j,k,kk
+  complex*16, allocatable  :: U(:,:)
+  complex*16, allocatable  :: Vt(:,:)
+  double precision, allocatable  :: D(:)
+
+  allocate(U(ao_num_per_kpt,ao_num_per_kpt),Vt(ao_num_per_kpt,ao_num_per_kpt),D(ao_num_per_kpt))
+
+  do kk=1,kpt_num
+    call svd_complex(ao_overlap_kpts(1,1,kk),size(ao_overlap_kpts,1),U,size(U,1),D,Vt,size(Vt,1),ao_num_per_kpt,ao_num_per_kpt)
+  
+    do i=1,ao_num_per_kpt
+      D(i) = dsqrt(D(i))
+      do j=1,ao_num_per_kpt
+        S_half_kpts(j,i,kk) = (0.d0,0.d0)
+      enddo
+    enddo
+  
+    do k=1,ao_num_per_kpt
+        do j=1,ao_num_per_kpt
+          do i=1,ao_num_per_kpt
+            S_half_kpts(i,j,kk) = S_half_kpts(i,j,kk) + U(i,k)*D(k)*Vt(k,j)
+          enddo
+        enddo
+    enddo
+  enddo
+
+  deallocate(U,Vt,D)
+
+END_PROVIDER
+
diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f
index 4f117deb..18f866d2 100644
--- a/src/ao_one_e_ints/kin_ao_ints.irp.f
+++ b/src/ao_one_e_ints/kin_ao_ints.irp.f
@@ -149,7 +149,29 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integrals, (ao_num,ao_num)]
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [double precision, ao_kinetic_integrals_imag, (ao_num,ao_num)]
+!BEGIN_PROVIDER [double precision, ao_kinetic_integrals_imag, (ao_num,ao_num)]
+!  implicit none
+!  BEGIN_DOC
+!  ! Kinetic energy integrals in the |AO| basis.
+!  !
+!  ! $\langle \chi_i |\hat{T}| \chi_j \rangle$
+!  !
+!  END_DOC
+!  integer                        :: i,j,k,l
+!
+!  if (read_ao_integrals_kinetic) then
+!    call ezfio_get_ao_one_e_ints_ao_integrals_kinetic_imag(ao_kinetic_integrals_imag)
+!    print *,  'AO kinetic integrals read from disk'
+!  else
+!    print *,  irp_here, ': Not yet implemented'
+!  endif
+!  if (write_ao_integrals_kinetic) then
+!    call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_imag(ao_kinetic_integrals_imag)
+!    print *,  'AO kinetic integrals written to disk'
+!  endif
+!END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_kinetic_integrals_complex, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
   ! Kinetic energy integrals in the |AO| basis.
@@ -157,17 +179,36 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integrals_imag, (ao_num,ao_num)]
   ! $\langle \chi_i |\hat{T}| \chi_j \rangle$
   !
   END_DOC
-  integer                        :: i,j,k,l
-
   if (read_ao_integrals_kinetic) then
-    call ezfio_get_ao_one_e_ints_ao_integrals_kinetic(ao_kinetic_integrals_imag)
+    call ezfio_get_ao_one_e_ints_ao_integrals_kinetic_complex(ao_kinetic_integrals_complex)
     print *,  'AO kinetic integrals read from disk'
   else
     print *,  irp_here, ': Not yet implemented'
+    stop -1
   endif
   if (write_ao_integrals_kinetic) then
-    call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(ao_kinetic_integrals_imag)
+    call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_complex(ao_kinetic_integrals_complex)
     print *,  'AO kinetic integrals written to disk'
   endif
 END_PROVIDER
 
+BEGIN_PROVIDER [complex*16, ao_kinetic_integrals_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Kinetic energy integrals in the |AO| basis.
+  !
+  ! $\langle \chi_i |\hat{T}| \chi_j \rangle$
+  !
+  END_DOC
+  if (read_ao_integrals_kinetic) then
+    call ezfio_get_ao_one_e_ints_ao_integrals_kinetic_kpts(ao_kinetic_integrals_kpts)
+    print *,  'AO kinetic integrals read from disk'
+  else
+    print *,  irp_here, ': Not yet implemented'
+    stop -1
+  endif
+  if (write_ao_integrals_kinetic) then
+    call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_kpts(ao_kinetic_integrals_kpts)
+    print *,  'AO kinetic integrals written to disk'
+  endif
+END_PROVIDER
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 4108ce71..16697739 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -91,28 +91,61 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_integrals_n_e_imag, (ao_num,ao_num)]
+!BEGIN_PROVIDER [ double precision, ao_integrals_n_e_imag, (ao_num,ao_num)]
+!  BEGIN_DOC
+!  !  Nucleus-electron interaction, in the |AO| basis set.
+!  !
+!  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
+!  END_DOC
+!  implicit none
+!  double precision               :: alpha, beta, gama, delta
+!  integer                        :: num_A,num_B
+!  double precision               :: A_center(3),B_center(3),C_center(3)
+!  integer                        :: power_A(3),power_B(3)
+!  integer                        :: i,j,k,l,n_pt_in,m
+!  double precision               :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+!
+!  if (read_ao_integrals_n_e) then
+!    call ezfio_get_ao_one_e_ints_ao_integrals_n_e_imag(ao_integrals_n_e_imag)
+!    print *,  'AO N-e integrals read from disk'
+!  else
+!   print *,  irp_here, ': Not yet implemented'
+!  endif
+!END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_integrals_n_e_complex, (ao_num,ao_num)]
+  implicit none
   BEGIN_DOC
   !  Nucleus-electron interaction, in the |AO| basis set.
   !
   !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
   END_DOC
-  implicit none
-  double precision               :: alpha, beta, gama, delta
-  integer                        :: num_A,num_B
-  double precision               :: A_center(3),B_center(3),C_center(3)
-  integer                        :: power_A(3),power_B(3)
-  integer                        :: i,j,k,l,n_pt_in,m
-  double precision               :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
-
+  print*,'error: ',irp_here
+  write(*,*) "test"
+  ao_integrals_n_e_complex(999,999) = 0.d0
+  call abort()
   if (read_ao_integrals_n_e) then
-    call ezfio_get_ao_one_e_ints_ao_integrals_n_e_imag(ao_integrals_n_e_imag)
+    call ezfio_get_ao_one_e_ints_ao_integrals_n_e_complex(ao_integrals_n_e_complex)
     print *,  'AO N-e integrals read from disk'
   else
    print *,  irp_here, ': Not yet implemented'
   endif
 END_PROVIDER
 
+BEGIN_PROVIDER [complex*16, ao_integrals_n_e_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  !  Nucleus-electron interaction, in the |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
+  END_DOC
+  if (read_ao_integrals_n_e) then
+    call ezfio_get_ao_one_e_ints_ao_integrals_n_e_kpts(ao_integrals_n_e_kpts)
+    print *,  'AO N-e integrals read from disk'
+  else
+   print *,  irp_here, ': Not yet implemented'
+  endif
+END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, ao_integrals_n_e_per_atom, (ao_num,ao_num,nucl_num)]
   BEGIN_DOC
diff --git a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
index 988bbe0a..1bf44d21 100644
--- a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
@@ -27,6 +27,59 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integrals, (ao_num,ao_num)]
 
 END_PROVIDER
 
+!BEGIN_PROVIDER [ double precision, ao_pseudo_integrals_imag, (ao_num, ao_num) ]
+! implicit none
+! BEGIN_DOC
+! ! Imaginary part of the pseudo_integrals
+! END_DOC
+!  if (read_ao_integrals_pseudo) then
+!     call ezfio_get_ao_one_e_ints_ao_integrals_pseudo_imag(ao_pseudo_integrals_imag(1:ao_num, 1:ao_num))
+!     print *,  'AO pseudo_integrals integrals read from disk'
+!  else
+!    ao_pseudo_integrals_imag = 0.d0
+!  endif
+!  if (write_ao_integrals_pseudo) then
+!     call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_imag(ao_pseudo_integrals_imag(1:ao_num, 1:ao_num))
+!     print *,  'AO pseudo_integrals integrals written to disk'
+!  endif
+!END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, ao_pseudo_integrals_complex, (ao_num, ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex AOs
+  END_DOC
+  if (read_ao_integrals_pseudo) then
+     call ezfio_get_ao_one_e_ints_ao_integrals_pseudo_complex(ao_pseudo_integrals_complex)
+     print *,  'AO pseudo_integrals integrals read from disk'
+  else
+    print*,irp_here,'not implemented'
+    stop -1
+  endif
+  if (write_ao_integrals_pseudo) then
+     call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_complex(ao_pseudo_integrals_complex)
+     print *,  'AO pseudo_integrals integrals written to disk'
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, ao_pseudo_integrals_kpts, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex AOs
+  END_DOC
+  if (read_ao_integrals_pseudo) then
+     call ezfio_get_ao_one_e_ints_ao_integrals_pseudo_kpts(ao_pseudo_integrals_kpts)
+     print *,  'AO pseudo_integrals integrals read from disk'
+  else
+    print*,irp_here,'not implemented'
+    stop -1
+  endif
+  if (write_ao_integrals_pseudo) then
+     call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_kpts(ao_pseudo_integrals_kpts)
+     print *,  'AO pseudo_integrals integrals written to disk'
+  endif
+END_PROVIDER
+
 BEGIN_PROVIDER [ double precision, ao_pseudo_integrals_local, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
diff --git a/src/ao_one_e_ints/screening.irp.f b/src/ao_one_e_ints/screening.irp.f
index 1bbe3c73..bc95ea86 100644
--- a/src/ao_one_e_ints/screening.irp.f
+++ b/src/ao_one_e_ints/screening.irp.f
@@ -3,7 +3,7 @@ logical function ao_one_e_integral_zero(i,k)
   integer, intent(in) :: i,k
 
   ao_one_e_integral_zero = .False.
-  if (.not.((io_ao_integrals_overlap/='None').or.is_periodic)) then
+  if (.not.((io_ao_integrals_overlap/='None').or.is_complex)) then
     if (ao_overlap_abs(i,k) < ao_integrals_threshold) then
         ao_one_e_integral_zero = .True.
         return
diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg
index b18c65d1..5b50f718 100644
--- a/src/ao_two_e_ints/EZFIO.cfg
+++ b/src/ao_two_e_ints/EZFIO.cfg
@@ -18,3 +18,20 @@ interface: ezfio,provider,ocaml
 default: False
 ezfio_name: direct
 
+[df_num]
+type: integer
+doc: Size of df basis
+interface: ezfio, provider
+
+[io_df_ao_integrals]
+type: Disk_access
+doc: Read/Write df |AO| integrals from/to disk [ Write | Read | None ] 
+interface: ezfio,provider,ocaml
+default: None
+
+[df_ao_integrals_complex]
+type: double precision
+doc: Real part of the df integrals over AOs
+size: (2,ao_basis.ao_num_per_kpt,ao_basis.ao_num_per_kpt,ao_two_e_ints.df_num,nuclei.kpt_pair_num)
+interface: ezfio
+
diff --git a/src/ao_two_e_ints/df_ao_ints.irp.f b/src/ao_two_e_ints/df_ao_ints.irp.f
new file mode 100644
index 00000000..870f81be
--- /dev/null
+++ b/src/ao_two_e_ints/df_ao_ints.irp.f
@@ -0,0 +1,233 @@
+BEGIN_PROVIDER [complex*16, df_ao_integrals_complex, (ao_num_per_kpt,ao_num_per_kpt,df_num,kpt_pair_num)]
+  implicit none
+  BEGIN_DOC
+  !  df AO integrals
+  END_DOC
+  integer :: i,j,k,l
+
+  if (read_df_ao_integrals) then
+    call ezfio_get_ao_two_e_ints_df_ao_integrals_complex(df_ao_integrals_complex)
+    print *,  'df AO integrals read from disk'
+  else
+    print*,'df ao integrals must be provided',irp_here
+    stop -1
+  endif
+
+  if (write_df_ao_integrals) then
+    call ezfio_set_ao_two_e_ints_df_ao_integrals_complex(df_ao_integrals_complex)
+    print *,  'df AO integrals written to disk'
+  endif
+
+END_PROVIDER
+
+
+subroutine ao_map_fill_from_df
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill ao bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_ao,j_ao,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:), ints_ikjl(:,:,:,:)
+
+  complex*16 :: integral
+  integer                        :: n_integrals_1, n_integrals_2
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i_1(:), buffer_i_2(:)
+  real(integral_kind),allocatable :: buffer_values_1(:), buffer_values_2(:)
+  double precision               :: tmp_re,tmp_im
+  integer                        :: ao_num_kpt_2
+
+  double precision               :: cpu_1, cpu_2, wall_1, wall_2, wall_0
+  double precision               :: map_mb
+
+  logical :: use_map1
+  integer(keY_kind) :: idx_tmp
+  double precision :: sign
+
+  ao_num_kpt_2 = ao_num_per_kpt * ao_num_per_kpt
+
+  size_buffer = min(ao_num_per_kpt*ao_num_per_kpt*ao_num_per_kpt,16000000)
+  print*, 'Providing the ao_bielec integrals from 3-index df integrals'
+  call write_time(6)
+!  call ezfio_set_integrals_bielec_disk_access_mo_integrals('Write')
+!  TOUCH read_mo_integrals read_ao_integrals write_mo_integrals write_ao_integrals 
+
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  allocate( ints_jl(ao_num_per_kpt,ao_num_per_kpt,df_num))
+
+  wall_0 = wall_1
+  do kl=1, kpt_num
+    do kj=1, kl
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_ao=1,ao_num_per_kpt
+          do j_ao=1,ao_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        ints_jl = df_ao_integrals_complex(:,:,:,kjkl2)
+      endif
+
+  !$OMP PARALLEL PRIVATE(i,k,j,l,ki,kk,ii,ik,ij,il,kikk2,jl2,ik2, &
+      !$OMP  ints_ik, ints_ikjl, i_ao, j_ao, i_df, &
+      !$OMP  n_integrals_1, buffer_i_1, buffer_values_1, &
+      !$OMP  n_integrals_2, buffer_i_2, buffer_values_2, &
+      !$OMP  idx_tmp, tmp_re, tmp_im, integral,sign,use_map1) &
+      !$OMP  DEFAULT(NONE)  &
+      !$OMP  SHARED(size_buffer, kpt_num, df_num, ao_num_per_kpt, ao_num_kpt_2, &
+      !$OMP  kl,kj,kjkl2,ints_jl, & 
+      !$OMP  kconserv, df_ao_integrals_complex, ao_integrals_threshold, ao_integrals_map, ao_integrals_map_2)
+  
+  allocate( &
+    ints_ik(ao_num_per_kpt,ao_num_per_kpt,df_num), &
+    ints_ikjl(ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt), &
+    buffer_i_1(size_buffer), &
+    buffer_i_2(size_buffer), &
+    buffer_values_1(size_buffer), &
+    buffer_values_2(size_buffer) &
+  )
+
+  !$OMP DO SCHEDULE(guided)
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+      !  if ((kl == kj) .and. (ki > kk)) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+      !  if (kikk2 > kjkl2) cycle
+        if (ki < kk) then
+          do i_ao=1,ao_num_per_kpt
+            do j_ao=1,ao_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          ints_ik = df_ao_integrals_complex(:,:,:,kikk2)
+        endif
+
+        call zgemm('N','T', ao_num_kpt_2, ao_num_kpt_2, df_num, &
+               (1.d0,0.d0), ints_ik, ao_num_kpt_2, &
+               ints_jl, ao_num_kpt_2, &
+               (0.d0,0.d0), ints_ikjl, ao_num_kpt_2)
+
+        n_integrals_1=0
+        n_integrals_2=0
+        do il=1,ao_num_per_kpt
+          l=il+(kl-1)*ao_num_per_kpt
+          do ij=1,ao_num_per_kpt
+            j=ij+(kj-1)*ao_num_per_kpt
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do ik=1,ao_num_per_kpt
+              k=ik+(kk-1)*ao_num_per_kpt
+              if (k>l) exit
+              do ii=1,ao_num_per_kpt
+                i=ii+(ki-1)*ao_num_per_kpt
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                integral = ints_ikjl(ii,ik,ij,il)
+!                print*,i,k,j,l,real(integral),imag(integral)
+                if (cdabs(integral) < ao_integrals_threshold) then
+                  cycle
+                endif
+                call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+                tmp_re = dble(integral)
+                tmp_im = dimag(integral)
+                if (use_map1) then
+                  n_integrals_1 += 1
+                  buffer_i_1(n_integrals_1)=idx_tmp
+                  buffer_values_1(n_integrals_1)=tmp_re
+                  if (sign.ne.0.d0) then 
+                    n_integrals_1 += 1
+                    buffer_i_1(n_integrals_1)=idx_tmp+1
+                    buffer_values_1(n_integrals_1)=tmp_im*sign
+                  endif 
+                  if (n_integrals_1 >= size(buffer_i_1)-1) then
+                    call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+                    n_integrals_1 = 0
+                  endif
+                else
+                  n_integrals_2 += 1
+                  buffer_i_2(n_integrals_2)=idx_tmp
+                  buffer_values_2(n_integrals_2)=tmp_re
+                  if (sign.ne.0.d0) then
+                    n_integrals_2 += 1
+                    buffer_i_2(n_integrals_2)=idx_tmp+1
+                    buffer_values_2(n_integrals_2)=tmp_im*sign
+                  endif 
+                  if (n_integrals_2 >= size(buffer_i_2)-1) then
+                    call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+                    n_integrals_2 = 0
+                  endif
+                endif
+
+              enddo !ii
+            enddo !ik
+          enddo !ij
+        enddo !il
+
+        if (n_integrals_1 > 0) then
+          call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+        endif
+        if (n_integrals_2 > 0) then
+          call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+        endif
+      enddo !kk
+  !$OMP END DO NOWAIT
+  deallocate( &
+    ints_ik, &
+    ints_ikjl, &
+    buffer_i_1, &
+    buffer_i_2, &
+    buffer_values_1, &
+    buffer_values_2 &
+    )
+  !$OMP END PARALLEL
+    enddo !kj
+    call wall_time(wall_2)
+    if (wall_2 - wall_0 > 1.d0) then
+      wall_0 = wall_2
+      print*, 100.*float(kl)/float(kpt_num), '% in ',               &
+          wall_2-wall_1,'s',map_mb(ao_integrals_map),'+',map_mb(ao_integrals_map_2),'MB'
+    endif
+
+  enddo !kl
+  deallocate( ints_jl ) 
+
+  call map_sort(ao_integrals_map)
+  call map_unique(ao_integrals_map)
+  call map_sort(ao_integrals_map_2)
+  call map_unique(ao_integrals_map_2)
+  !call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_1',ao_integrals_map)
+  !call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_2',ao_integrals_map_2)
+  !call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
+  
+  call wall_time(wall_2)
+  call cpu_time(cpu_2)
+
+  integer*8                      :: get_ao_map_size, ao_map_size
+  ao_map_size = get_ao_map_size()
+  
+  print*,'AO integrals provided:'
+  print*,' Size of AO map           ', map_mb(ao_integrals_map),'+',map_mb(ao_integrals_map_2),'MB'
+  print*,' Number of AO integrals: ',  ao_map_size
+  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+  
+end subroutine ao_map_fill_from_df
+
diff --git a/src/ao_two_e_ints/map_integrals.irp.f b/src/ao_two_e_ints/map_integrals.irp.f
index c0ec9695..3a0a2659 100644
--- a/src/ao_two_e_ints/map_integrals.irp.f
+++ b/src/ao_two_e_ints/map_integrals.irp.f
@@ -4,6 +4,7 @@ use map_module
 !! ======
 
 BEGIN_PROVIDER [ type(map_type), ao_integrals_map ]
+&BEGIN_PROVIDER [ type(map_type), ao_integrals_map_2 ]
   implicit none
   BEGIN_DOC
   ! AO integrals
@@ -11,9 +12,17 @@ BEGIN_PROVIDER [ type(map_type), ao_integrals_map ]
   integer(key_kind)              :: key_max
   integer(map_size_kind)         :: sze
   call two_e_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max)
-  sze = key_max
-  call map_init(ao_integrals_map,sze)
-  print*,  'AO map initialized : ', sze
+  if (is_complex) then
+    sze = key_max*2
+    call map_init(ao_integrals_map,sze)
+    call map_init(ao_integrals_map_2,sze) 
+    print*,  'AO maps initialized (complex): ', 2*sze
+  else
+    sze = key_max
+    call map_init(ao_integrals_map,sze)
+    call map_init(ao_integrals_map_2,1_map_size_kind) 
+    print*,  'AO map initialized : ', sze
+  endif
 END_PROVIDER
 
 subroutine two_e_integrals_index(i,j,k,l,i1)
@@ -21,7 +30,7 @@ subroutine two_e_integrals_index(i,j,k,l,i1)
   implicit none
   BEGIN_DOC
 ! Gives a unique index for i,j,k,l using permtuation symmetry.
-! i <-> k, j <-> l, and (i,k) <-> (j,l) for non-periodic systems
+! i <-> k, j <-> l, and (i,k) <-> (j,l) 
   END_DOC
   integer, intent(in)            :: i,j,k,l
   integer(key_kind), intent(out) :: i1
@@ -37,8 +46,6 @@ subroutine two_e_integrals_index(i,j,k,l,i1)
   i1 = i1+shiftr(i2*i2-i2,1)
 end
 
-
-
 subroutine two_e_integrals_index_reverse(i,j,k,l,i1)
   use map_module
   implicit none
@@ -128,155 +135,6 @@ subroutine two_e_integrals_index_reverse(i,j,k,l,i1)
 
 end
 
-
-
-subroutine ao_idx2_sq(i,j,ij)
-  implicit none
-  integer, intent(in)  :: i,j
-  integer, intent(out) :: ij
-  if (i<j) then
-    ij=(j-1)*(j-1)+2*i-mod(j+1,2)
-  else if (i>j) then
-    ij=(i-1)*(i-1)+2*j-mod(i,2)
-  else
-    ij=i*i
-  endif
-end
-  
-subroutine idx2_tri_int(i,j,ij)
-  implicit none
-  integer, intent(in)  :: i,j
-  integer, intent(out) :: ij
-  integer :: p,q
-  p = max(i,j)
-  q = min(i,j)
-  ij = q+ishft(p*p-p,-1)
-end
-  
-subroutine ao_idx2_tri_key(i,j,ij)
-  use map_module
-  implicit none
-  integer, intent(in)  :: i,j
-  integer(key_kind), intent(out) :: ij
-  integer(key_kind) :: p,q
-  p = max(i,j)
-  q = min(i,j)
-  ij = q+ishft(p*p-p,-1)
-end
-  
-subroutine two_e_integrals_index_2fold(i,j,k,l,i1)                                           
-  use map_module
-  implicit none
-  integer, intent(in)            :: i,j,k,l
-  integer(key_kind), intent(out) :: i1
-  integer                        :: ik,jl
-
-  call ao_idx2_sq(i,k,ik)
-  call ao_idx2_sq(j,l,jl)
-  call ao_idx2_tri_key(ik,jl,i1)
-end
-
-subroutine ao_idx2_sq_rev(i,k,ik)                                                                
-  BEGIN_DOC
-  ! reverse square compound index
-  END_DOC
-!  p = ceiling(dsqrt(dble(ik)))
-!  q = ceiling(0.5d0*(dble(ik)-dble((p-1)*(p-1))))
-!  if (mod(ik,2)==0) then
-!    k=p
-!    i=q
-!  else
-!    i=p
-!    k=q
-!  endif
-  integer, intent(in)           :: ik
-  integer, intent(out)          :: i,k
-  integer                       :: pq(0:1),i1,i2
-  pq(0) = ceiling(dsqrt(dble(ik)))
-  pq(1) = ceiling(0.5d0*(dble(ik)-dble((pq(0)-1)*(pq(0)-1))))
-  i1=mod(ik,2)
-  i2=mod(ik+1,2)
-
-  k=pq(i1)
-  i=pq(i2)
-end
-
-subroutine ao_idx2_tri_rev_key(i,k,ik)
-  use map_module
-  BEGIN_DOC
-  !return i<=k
-  END_DOC
-  integer(key_kind), intent(in) :: ik
-  integer, intent(out)          :: i,k
-  integer(key_kind) :: tmp_k
-  k = ceiling(0.5d0*(dsqrt(8.d0*dble(ik)+1.d0)-1.d0))
-  tmp_k = k
-  i = int(ik - ishft(tmp_k*tmp_k-tmp_k,-1))
-end
-
-subroutine idx2_tri_rev_int(i,k,ik)
-  BEGIN_DOC
-  !return i<=k
-  END_DOC
-  integer, intent(in)           :: ik
-  integer, intent(out)          :: i,k
-  k = ceiling(0.5d0*(dsqrt(8.d0*dble(ik)+1.d0)-1.d0))
-  i = int(ik - ishft(k*k-k,-1))
-end
-
-subroutine two_e_integrals_index_reverse_2fold(i,j,k,l,i1)
-  use map_module
-  implicit none
-  integer, intent(out)           :: i(2),j(2),k(2),l(2)
-  integer(key_kind), intent(in)  :: i1
-  integer(key_kind)              :: i0
-  integer                        :: i2,i3
-  i = 0
-  call ao_idx2_tri_rev_key(i3,i2,i1)
-
-  call ao_idx2_sq_rev(j(1),l(1),i2)
-  call ao_idx2_sq_rev(i(1),k(1),i3)
-
-              !ijkl
-  i(2) = j(1) !jilk
-  j(2) = i(1)
-  k(2) = l(1)
-  l(2) = k(1)
-
-!  i(3) = k(1) !klij   complex conjugate
-!  j(3) = l(1)
-!  k(3) = i(1)
-!  l(3) = j(1)
-!
-!  i(4) = l(1) !lkji   complex conjugate
-!  j(4) = k(1)
-!  k(4) = j(1)
-!  l(4) = i(1)
-
-  integer :: ii
-  if ( (i(1)==i(2)).and. &
-       (j(1)==j(2)).and. &
-       (k(1)==k(2)).and. &
-       (l(1)==l(2)) ) then
-    i(2) = 0
-  endif
-! This has been tested with up to 1000 AOs, and all the reverse indices are
-! correct ! We can remove the test
-!  do ii=1,2
-!    if (i(ii) /= 0) then
-!      call two_e_integrals_index_2fold(i(ii),j(ii),k(ii),l(ii),i0)
-!      if (i1 /= i0) then
-!        print *,  i1, i0
-!        print *,  i(ii), j(ii), k(ii), l(ii)
-!        stop 'two_e_integrals_index_reverse_2fold failed'
-!      endif
-!    endif
-!  enddo
-end
-
-
-
-
  BEGIN_PROVIDER [ integer, ao_integrals_cache_min ]
 &BEGIN_PROVIDER [ integer, ao_integrals_cache_max ]
  implicit none
@@ -359,111 +217,111 @@ double precision function get_ao_two_e_integral(i,j,k,l,map) result(result)
   result = tmp
 end
 
-BEGIN_PROVIDER [ complex*16, ao_integrals_cache_periodic, (0:64*64*64*64) ]
- implicit none
- BEGIN_DOC
- ! Cache of AO integrals for fast access
- END_DOC
- PROVIDE ao_two_e_integrals_in_map
- integer                        :: i,j,k,l,ii
- integer(key_kind)              :: idx1, idx2
- real(integral_kind)            :: tmp_re, tmp_im
- integer(key_kind)              :: idx_re,idx_im
- complex(integral_kind)         :: integral
+!BEGIN_PROVIDER [ complex*16, ao_integrals_cache_periodic, (0:64*64*64*64) ]
+! implicit none
+! BEGIN_DOC
+! ! Cache of AO integrals for fast access
+! END_DOC
+! PROVIDE ao_two_e_integrals_in_map
+! integer                        :: i,j,k,l,ii
+! integer(key_kind)              :: idx1, idx2
+! real(integral_kind)            :: tmp_re, tmp_im
+! integer(key_kind)              :: idx_re,idx_im
+! complex(integral_kind)         :: integral
+!
+!
+! !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx1,idx2,tmp_re,tmp_im,idx_re,idx_im,ii,integral)
+! do l=ao_integrals_cache_min,ao_integrals_cache_max
+!   do k=ao_integrals_cache_min,ao_integrals_cache_max
+!     do j=ao_integrals_cache_min,ao_integrals_cache_max
+!       do i=ao_integrals_cache_min,ao_integrals_cache_max
+!         !DIR$ FORCEINLINE
+!         call two_e_integrals_index_2fold(i,j,k,l,idx1)
+!         !DIR$ FORCEINLINE
+!         call two_e_integrals_index_2fold(k,l,i,j,idx2)
+!         idx_re = min(idx1,idx2)
+!         idx_im = max(idx1,idx2)
+!         !DIR$ FORCEINLINE
+!         call map_get(ao_integrals_map,idx_re,tmp_re)
+!         if (idx_re /= idx_im) then
+!           call map_get(ao_integrals_map,idx_im,tmp_im)
+!           if (idx1 < idx2) then
+!             integral = dcmplx(tmp_re,tmp_im)
+!           else
+!             integral = dcmplx(tmp_re,-tmp_im)
+!           endif
+!         else
+!           tmp_im = 0.d0
+!           integral = dcmplx(tmp_re,tmp_im)
+!         endif
+!         
+!         ii = l-ao_integrals_cache_min
+!         ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
+!         ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
+!         ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
+!         ao_integrals_cache_periodic(ii) = integral
+!       enddo
+!     enddo
+!   enddo
+! enddo
+! !$OMP END PARALLEL DO
+!
+!END_PROVIDER
 
 
- !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx1,idx2,tmp_re,tmp_im,idx_re,idx_im,ii,integral)
- do l=ao_integrals_cache_min,ao_integrals_cache_max
-   do k=ao_integrals_cache_min,ao_integrals_cache_max
-     do j=ao_integrals_cache_min,ao_integrals_cache_max
-       do i=ao_integrals_cache_min,ao_integrals_cache_max
-         !DIR$ FORCEINLINE
-         call two_e_integrals_index_2fold(i,j,k,l,idx1)
-         !DIR$ FORCEINLINE
-         call two_e_integrals_index_2fold(k,l,i,j,idx2)
-         idx_re = min(idx1,idx2)
-         idx_im = max(idx1,idx2)
-         !DIR$ FORCEINLINE
-         call map_get(ao_integrals_map,idx_re,tmp_re)
-         if (idx_re /= idx_im) then
-           call map_get(ao_integrals_map,idx_im,tmp_im)
-           if (idx1 < idx2) then
-             integral = dcmplx(tmp_re,tmp_im)
-           else
-             integral = dcmplx(tmp_re,-tmp_im)
-           endif
-         else
-           tmp_im = 0.d0
-           integral = dcmplx(tmp_re,tmp_im)
-         endif
-         
-         ii = l-ao_integrals_cache_min
-         ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
-         ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
-         ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
-         ao_integrals_cache_periodic(ii) = integral
-       enddo
-     enddo
-   enddo
- enddo
- !$OMP END PARALLEL DO
-
-END_PROVIDER
-
-
-complex*16 function get_ao_two_e_integral_periodic(i,j,k,l,map) result(result)
-  use map_module
-  implicit none
-  BEGIN_DOC
-  ! Gets one AO bi-electronic integral from the AO map
-  END_DOC
-  integer, intent(in)            :: i,j,k,l
-  integer(key_kind)              :: idx1,idx2
-  real(integral_kind)            :: tmp_re, tmp_im
-  integer(key_kind)              :: idx_re,idx_im
-  type(map_type), intent(inout)  :: map
-  integer                        :: ii
-  complex(integral_kind)         :: tmp
-  PROVIDE ao_two_e_integrals_in_map ao_integrals_cache_periodic ao_integrals_cache_min
-  !DIR$ FORCEINLINE
-  logical, external              :: ao_two_e_integral_zero
-  if (ao_two_e_integral_zero(i,j,k,l)) then
-    tmp = (0.d0,0.d0)
-  else
-    ii = l-ao_integrals_cache_min
-    ii = ior(ii, k-ao_integrals_cache_min)
-    ii = ior(ii, j-ao_integrals_cache_min)
-    ii = ior(ii, i-ao_integrals_cache_min)
-    if (iand(ii, -64) /= 0) then
-         !DIR$ FORCEINLINE
-         call two_e_integrals_index_2fold(i,j,k,l,idx1)
-         !DIR$ FORCEINLINE
-         call two_e_integrals_index_2fold(k,l,i,j,idx2)
-         idx_re = min(idx1,idx2)
-         idx_im = max(idx1,idx2)
-         !DIR$ FORCEINLINE
-         call map_get(ao_integrals_map,idx_re,tmp_re)
-         if (idx_re /= idx_im) then
-           call map_get(ao_integrals_map,idx_im,tmp_im)
-           if (idx1 < idx2) then
-             tmp = dcmplx(tmp_re,tmp_im)
-           else
-             tmp = dcmplx(tmp_re,-tmp_im)
-           endif
-         else
-           tmp_im = 0.d0
-           tmp = dcmplx(tmp_re,tmp_im)
-         endif
-    else
-      ii = l-ao_integrals_cache_min
-      ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
-      ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
-      ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
-      tmp = ao_integrals_cache_periodic(ii)
-    endif
-    result = tmp
-  endif
-end
+!complex*16 function get_ao_two_e_integral_periodic(i,j,k,l,map) result(result)
+!  use map_module
+!  implicit none
+!  BEGIN_DOC
+!  ! Gets one AO bi-electronic integral from the AO map
+!  END_DOC
+!  integer, intent(in)            :: i,j,k,l
+!  integer(key_kind)              :: idx1,idx2
+!  real(integral_kind)            :: tmp_re, tmp_im
+!  integer(key_kind)              :: idx_re,idx_im
+!  type(map_type), intent(inout)  :: map
+!  integer                        :: ii
+!  complex(integral_kind)         :: tmp
+!  PROVIDE ao_two_e_integrals_in_map ao_integrals_cache_periodic ao_integrals_cache_min
+!  !DIR$ FORCEINLINE
+!  logical, external              :: ao_two_e_integral_zero
+!  if (ao_two_e_integral_zero(i,j,k,l)) then
+!    tmp = (0.d0,0.d0)
+!  else
+!    ii = l-ao_integrals_cache_min
+!    ii = ior(ii, k-ao_integrals_cache_min)
+!    ii = ior(ii, j-ao_integrals_cache_min)
+!    ii = ior(ii, i-ao_integrals_cache_min)
+!    if (iand(ii, -64) /= 0) then
+!         !DIR$ FORCEINLINE
+!         call two_e_integrals_index_2fold(i,j,k,l,idx1)
+!         !DIR$ FORCEINLINE
+!         call two_e_integrals_index_2fold(k,l,i,j,idx2)
+!         idx_re = min(idx1,idx2)
+!         idx_im = max(idx1,idx2)
+!         !DIR$ FORCEINLINE
+!         call map_get(ao_integrals_map,idx_re,tmp_re)
+!         if (idx_re /= idx_im) then
+!           call map_get(ao_integrals_map,idx_im,tmp_im)
+!           if (idx1 < idx2) then
+!             tmp = dcmplx(tmp_re,tmp_im)
+!           else
+!             tmp = dcmplx(tmp_re,-tmp_im)
+!           endif
+!         else
+!           tmp_im = 0.d0
+!           tmp = dcmplx(tmp_re,tmp_im)
+!         endif
+!    else
+!      ii = l-ao_integrals_cache_min
+!      ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
+!      ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
+!      ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
+!      tmp = ao_integrals_cache_periodic(ii)
+!    endif
+!    result = tmp
+!  endif
+!end
 
 
 subroutine get_ao_two_e_integrals(j,k,l,sze,out_val)
@@ -495,33 +353,33 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val)
 end
 
 
-subroutine get_ao_two_e_integrals_periodic(j,k,l,sze,out_val)
-  use map_module
-  BEGIN_DOC
-  ! Gets multiple AO bi-electronic integral from the AO map .
-  ! All i are retrieved for j,k,l fixed.
-  ! physicist convention : <ij|kl> 
-  END_DOC
-  implicit none
-  integer, intent(in)            :: j,k,l, sze
-  complex(integral_kind), intent(out) :: out_val(sze)
-
-  integer                        :: i
-  integer(key_kind)              :: hash
-  logical, external              :: ao_one_e_integral_zero
-  PROVIDE ao_two_e_integrals_in_map ao_integrals_map
-
-  if (ao_one_e_integral_zero(j,l)) then
-    out_val = 0.d0
-    return
-  endif
-
-  double precision :: get_ao_two_e_integral
-  do i=1,sze
-    out_val(i) = get_ao_two_e_integral(i,j,k,l,ao_integrals_map)
-  enddo
-
-end
+!subroutine get_ao_two_e_integrals_periodic(j,k,l,sze,out_val)
+!  use map_module
+!  BEGIN_DOC
+!  ! Gets multiple AO bi-electronic integral from the AO map .
+!  ! All i are retrieved for j,k,l fixed.
+!  ! physicist convention : <ij|kl> 
+!  END_DOC
+!  implicit none
+!  integer, intent(in)            :: j,k,l, sze
+!  complex(integral_kind), intent(out) :: out_val(sze)
+!
+!  integer                        :: i
+!  integer(key_kind)              :: hash
+!  logical, external              :: ao_one_e_integral_zero
+!  PROVIDE ao_two_e_integrals_in_map ao_integrals_map
+!
+!  if (ao_one_e_integral_zero(j,l)) then
+!    out_val = 0.d0
+!    return
+!  endif
+!
+!  double precision :: get_ao_two_e_integral
+!  do i=1,sze
+!    out_val(i) = get_ao_two_e_integral(i,j,k,l,ao_integrals_map)
+!  enddo
+!
+!end
 
 subroutine get_ao_two_e_integrals_non_zero(j,k,l,sze,out_val,out_val_index,non_zero_int)
   use map_module
@@ -539,6 +397,10 @@ subroutine get_ao_two_e_integrals_non_zero(j,k,l,sze,out_val,out_val_index,non_z
   double precision               :: tmp
   logical, external              :: ao_one_e_integral_zero
   logical, external              :: ao_two_e_integral_zero
+  if(is_complex) then
+    print*,'not implemented for periodic:',irp_here
+    stop -1
+  endif
   PROVIDE ao_two_e_integrals_in_map
 
   non_zero_int = 0
@@ -584,6 +446,10 @@ subroutine get_ao_two_e_integrals_non_zero_jl(j,l,thresh,sze_max,sze,out_val,out
   logical, external              :: ao_one_e_integral_zero
   logical, external              :: ao_two_e_integral_zero
 
+  if(is_complex) then
+    print*,'not implemented for periodic:',irp_here
+    stop -1
+  endif
   PROVIDE ao_two_e_integrals_in_map
   non_zero_int = 0
   if (ao_one_e_integral_zero(j,l)) then
@@ -632,6 +498,10 @@ subroutine get_ao_two_e_integrals_non_zero_jl_from_list(j,l,thresh,list,n_list,s
   logical, external              :: ao_one_e_integral_zero
   logical, external              :: ao_two_e_integral_zero
 
+  if(is_complex) then
+    print*,'not implemented for periodic:',irp_here
+    stop -1
+  endif
   PROVIDE ao_two_e_integrals_in_map
   non_zero_int = 0
   if (ao_one_e_integral_zero(j,l)) then
@@ -670,7 +540,7 @@ function get_ao_map_size()
   BEGIN_DOC
   ! Returns the number of elements in the AO map
   END_DOC
-  get_ao_map_size = ao_integrals_map % n_elements
+  get_ao_map_size = ao_integrals_map % n_elements + ao_integrals_map_2 % n_elements
 end
 
 subroutine clear_ao_map
@@ -680,6 +550,9 @@ subroutine clear_ao_map
   END_DOC
   call map_deinit(ao_integrals_map)
   FREE ao_integrals_map
+  call map_deinit(ao_integrals_map_2)
+  FREE ao_integrals_map_2
+  
 end
 
 
@@ -698,81 +571,3 @@ subroutine insert_into_ao_integrals_map(n_integrals,buffer_i, buffer_values)
 end
 
 
-!subroutine dump_ao_integrals(filename)
-!  use map_module
-!  implicit none
-!  BEGIN_DOC
-!  ! Save to disk the |AO| integrals
-!  END_DOC
-!  character*(*), intent(in)      :: filename
-!  integer(cache_key_kind), pointer :: key(:)
-!  real(integral_kind), pointer   :: val(:)
-!  integer*8                      :: i,j, n
-!  if (.not.mpi_master) then
-!    return
-!  endif
-!  call ezfio_set_work_empty(.False.)
-!  open(unit=66,file=filename,FORM='unformatted')
-!  write(66) integral_kind, key_kind
-!  write(66) ao_integrals_map%sorted, ao_integrals_map%map_size,    &
-!      ao_integrals_map%n_elements
-!  do i=0_8,ao_integrals_map%map_size
-!    write(66) ao_integrals_map%map(i)%sorted, ao_integrals_map%map(i)%map_size,&
-!        ao_integrals_map%map(i)%n_elements
-!  enddo
-!  do i=0_8,ao_integrals_map%map_size
-!    key => ao_integrals_map%map(i)%key
-!    val => ao_integrals_map%map(i)%value
-!    n = ao_integrals_map%map(i)%n_elements
-!    write(66) (key(j), j=1,n), (val(j), j=1,n)
-!  enddo
-!  close(66)
-!
-!end
-
-
-!integer function load_ao_integrals(filename)
-!  implicit none
-!  BEGIN_DOC
-!  ! Read from disk the |AO| integrals
-!  END_DOC
-!  character*(*), intent(in)      :: filename
-!  integer*8                      :: i
-!  integer(cache_key_kind), pointer :: key(:)
-!  real(integral_kind), pointer   :: val(:)
-!  integer                        :: iknd, kknd
-!  integer*8                      :: n, j
-!  load_ao_integrals = 1
-!  open(unit=66,file=filename,FORM='unformatted',STATUS='UNKNOWN')
-!  read(66,err=98,end=98) iknd, kknd
-!  if (iknd /= integral_kind) then
-!    print *,  'Wrong integrals kind in file :', iknd
-!    stop 1
-!  endif
-!  if (kknd /= key_kind) then
-!    print *,  'Wrong key kind in file :', kknd
-!    stop 1
-!  endif
-!  read(66,err=98,end=98) ao_integrals_map%sorted, ao_integrals_map%map_size,&
-!      ao_integrals_map%n_elements
-!  do i=0_8, ao_integrals_map%map_size
-!    read(66,err=99,end=99) ao_integrals_map%map(i)%sorted,          &
-!        ao_integrals_map%map(i)%map_size, ao_integrals_map%map(i)%n_elements
-!    call cache_map_reallocate(ao_integrals_map%map(i),ao_integrals_map%map(i)%map_size)
-!  enddo
-!  do i=0_8, ao_integrals_map%map_size
-!    key => ao_integrals_map%map(i)%key
-!    val => ao_integrals_map%map(i)%value
-!    n = ao_integrals_map%map(i)%n_elements
-!    read(66,err=99,end=99) (key(j), j=1,n), (val(j), j=1,n)
-!  enddo
-!  call map_sort(ao_integrals_map)
-!  load_ao_integrals = 0
-!  return
-!  99 continue
-!  call map_deinit(ao_integrals_map)
-!  98 continue
-!  stop 'Problem reading ao_integrals_map file in work/'
-!
-!end
-!
diff --git a/src/ao_two_e_ints/map_integrals_cplx.irp.f b/src/ao_two_e_ints/map_integrals_cplx.irp.f
new file mode 100644
index 00000000..12d17504
--- /dev/null
+++ b/src/ao_two_e_ints/map_integrals_cplx.irp.f
@@ -0,0 +1,574 @@
+use map_module
+
+
+subroutine idx2_tri_int(i,j,ij)
+  implicit none
+  integer, intent(in)  :: i,j
+  integer, intent(out) :: ij
+  integer :: p,q
+  p = max(i,j)
+  q = min(i,j)
+  ij = q+ishft(p*p-p,-1)
+end
+
+subroutine idx2_tri_key(i,j,ij)
+  use map_module
+  implicit none
+  integer, intent(in)  :: i,j
+  integer(key_kind), intent(out) :: ij
+  integer(key_kind) :: p,q
+  p = max(i,j)
+  q = min(i,j)
+  ij = q+ishft(p*p-p,-1)
+end
+subroutine two_e_integrals_index_complex(i,j,k,l,i1,p,q)
+  use map_module
+  implicit none
+  BEGIN_DOC
+! Gives a unique index for i,j,k,l using permtuation symmetry.
+! i <-> k, j <-> l, and (i,k) <-> (j,l) 
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind), intent(out) :: i1
+  integer(key_kind)              :: r,s,i2
+  integer(key_kind),intent(out)   :: p,q
+  p = min(i,k)
+  r = max(i,k)
+  p = p+shiftr(r*r-r,1)
+  q = min(j,l)
+  s = max(j,l)
+  q = q+shiftr(s*s-s,1)
+  i1 = min(p,q)
+  i2 = max(p,q)
+  i1 = i1+shiftr(i2*i2-i2,1)
+end
+
+
+
+subroutine two_e_integrals_index_reverse_complex_1(i,j,k,l,i1)
+  use map_module
+  implicit none
+  BEGIN_DOC
+! Computes the 4 indices $i,j,k,l$ from a unique index $i_1$.
+! For 2 indices $i,j$ and $i \le j$, we have
+! $p = i(i-1)/2 + j$.
+! The key point is that because $j < i$,
+! $i(i-1)/2 < p \le i(i+1)/2$. So $i$ can be found by solving
+! $i^2 - i - 2p=0$. One obtains $i=1 + \sqrt{1+8p}/2$
+! and $j = p - i(i-1)/2$.
+! This rule is applied 3 times. First for the symmetry of the
+! pairs (i,k) and (j,l), and then for the symmetry within each pair.
+! always returns first set such that i<=k, j<=l, ik<=jl
+  END_DOC
+  integer, intent(out)           :: i(4),j(4),k(4),l(4)
+  integer(key_kind), intent(in)  :: i1
+  integer(key_kind)              :: i2,i3
+  i = 0
+  i2   = ceiling(0.5d0*(dsqrt(dble(shiftl(i1,3)+1))-1.d0))
+  l(1) = ceiling(0.5d0*(dsqrt(dble(shiftl(i2,3)+1))-1.d0))
+  i3   = i1 - shiftr(i2*i2-i2,1)
+  k(1) = ceiling(0.5d0*(dsqrt(dble(shiftl(i3,3)+1))-1.d0))
+  j(1) = int(i2 - shiftr(l(1)*l(1)-l(1),1),4)
+  i(1) = int(i3 - shiftr(k(1)*k(1)-k(1),1),4)
+
+              !ijkl a+ib
+  i(2) = j(1) !jilk a+ib
+  j(2) = i(1)
+  k(2) = l(1)
+  l(2) = k(1)
+
+  i(3) = k(1) !klij a-ib
+  j(3) = l(1)
+  k(3) = i(1)
+  l(3) = j(1)
+
+  i(4) = l(1) !lkji a-ib
+  j(4) = k(1)
+  k(4) = j(1)
+  l(4) = i(1)
+
+  integer :: ii, jj
+  do ii=2,4
+    do jj=1,ii-1
+      if ( (i(ii) == i(jj)).and. &
+           (j(ii) == j(jj)).and. &
+           (k(ii) == k(jj)).and. &
+           (l(ii) == l(jj)) ) then
+         i(ii) = 0
+         exit
+      endif
+    enddo
+  enddo
+end
+
+subroutine two_e_integrals_index_reverse_complex_2(i,j,k,l,i1)
+  use map_module
+  implicit none
+  BEGIN_DOC
+! Computes the 4 indices $i,j,k,l$ from a unique index $i_1$.
+! For 2 indices $i,j$ and $i \le j$, we have
+! $p = i(i-1)/2 + j$.
+! The key point is that because $j < i$,
+! $i(i-1)/2 < p \le i(i+1)/2$. So $i$ can be found by solving
+! $i^2 - i - 2p=0$. One obtains $i=1 + \sqrt{1+8p}/2$
+! and $j = p - i(i-1)/2$.
+! This rule is applied 3 times. First for the symmetry of the
+! pairs (i,k) and (j,l), and then for the symmetry within each pair.
+! always returns first set such that k<=i, j<=l, ik<=jl
+  END_DOC
+  integer, intent(out)           :: i(4),j(4),k(4),l(4)
+  integer(key_kind), intent(in)  :: i1
+  integer(key_kind)              :: i2,i3
+  i = 0
+  i2   = ceiling(0.5d0*(dsqrt(dble(shiftl(i1,3)+1))-1.d0))
+  l(1) = ceiling(0.5d0*(dsqrt(dble(shiftl(i2,3)+1))-1.d0))
+  i3   = i1 - shiftr(i2*i2-i2,1)
+  i(1) = ceiling(0.5d0*(dsqrt(dble(shiftl(i3,3)+1))-1.d0))
+  j(1) = int(i2 - shiftr(l(1)*l(1)-l(1),1),4)
+  k(1) = int(i3 - shiftr(i(1)*i(1)-i(1),1),4)
+
+              !kjil a+ib
+  i(2) = j(1) !jkli a+ib
+  j(2) = i(1)
+  k(2) = l(1)
+  l(2) = k(1)
+
+  i(3) = k(1) !ilkj a-ib
+  j(3) = l(1)
+  k(3) = i(1)
+  l(3) = j(1)
+
+  i(4) = l(1) !lijk a-ib
+  j(4) = k(1)
+  k(4) = j(1)
+  l(4) = i(1)
+
+  integer :: ii, jj
+  do ii=2,4
+    do jj=1,ii-1
+      if ( (i(ii) == i(jj)).and. &
+           (j(ii) == j(jj)).and. &
+           (k(ii) == k(jj)).and. &
+           (l(ii) == l(jj)) ) then
+         i(ii) = 0
+         exit
+      endif
+    enddo
+  enddo
+end
+
+
+BEGIN_PROVIDER [ complex*16, ao_integrals_cache_complex, (0:64*64*64*64) ]
+ implicit none
+ BEGIN_DOC
+ ! Cache of AO integrals for fast access
+ END_DOC
+ PROVIDE ao_two_e_integrals_in_map
+ integer                        :: i,j,k,l,ii
+ integer(key_kind)              :: idx1, idx2
+ real(integral_kind)            :: tmp_re, tmp_im
+ integer(key_kind)              :: idx_re,idx_im
+ complex(integral_kind)         :: integral
+  integer(key_kind)              :: p,q,r,s,ik,jl
+  logical :: ilek, jlel, iklejl
+  complex*16 :: get_ao_two_e_integral_complex_simple
+
+
+ !$OMP PARALLEL DO PRIVATE (ilek,jlel,p,q,r,s, ik,jl,iklejl, &
+ !$OMP                     i,j,k,l,idx1,idx2,tmp_re,tmp_im,idx_re,idx_im,ii,integral)
+ do l=ao_integrals_cache_min,ao_integrals_cache_max
+   do k=ao_integrals_cache_min,ao_integrals_cache_max
+     do j=ao_integrals_cache_min,ao_integrals_cache_max
+       do i=ao_integrals_cache_min,ao_integrals_cache_max
+         !DIR$ FORCEINLINE
+         integral = get_ao_two_e_integral_complex_simple(i,j,k,l,&
+                    ao_integrals_map,ao_integrals_map_2)
+         
+         ii = l-ao_integrals_cache_min
+         ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
+         ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
+         ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
+         ao_integrals_cache_complex(ii) = integral
+       enddo
+     enddo
+   enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+subroutine ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx,sign) 
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! get position of periodic AO integral <ij|kl> 
+  ! use_map1: true if integral is in first ao map, false if integral is in second ao map
+  ! idx: position of real part of integral in map (imag part is at idx+1)
+  ! sign: sign of imaginary part
+  !
+  !
+  !  for <ab|cd>, conditionals are [a<c, b<d, ac<bd]
+  !  last two rows are real (ab==cd)
+  ! +---------+---------+---------+---------+---------+---------+---------+---------+---------+
+  ! | NEW     | <ij|kl> | <ji|lk> | <kl|ij> | <lk|ji> | <kj|il> | <jk|li> | <il|kj> | <li|jk> |
+  ! +---------+---------+---------+---------+---------+---------+---------+---------+---------+
+  ! |         |         m1        |         m1*       |         m2        |         m2*       |
+  ! +---------+---------+---------+---------+---------+---------+---------+---------+---------+
+  ! | <ij|kl> | TTT     | TTF     | FFT     | FFF     | FTT     | TFF     | TFT     | FTF     |
+  ! | <ij|il> | 0TT     | T0F     | 0FT     | F0F     |         |         |         |         |
+  ! | <ij|kj> | T0T     | 0TF     | F0T     | 0FF     |         |         |         |         |
+  ! | <ii|jj> | TT0     |         | FF0     |         | FT0(r)  | TF0(r)  |         |         |
+  ! +---------+---------+---------+---------+---------+---------+---------+---------+---------+
+  ! | <ij|ij> |         |         |         |         | 00T(r)  | 00F(r)  |         |         |
+  ! | <ii|ii> |         |         |         |         | 000     |         |         |         |
+  ! +---------+---------+---------+---------+---------+---------+---------+---------+---------+
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind), intent(out) :: idx
+  logical, intent(out)           :: use_map1
+  double precision, intent(out)  :: sign
+  integer(key_kind)              :: p,q,r,s,ik,jl,ij,kl
+  !DIR$ FORCEINLINE
+  call two_e_integrals_index_complex(i,j,k,l,idx,ik,jl)
+  p = min(i,j)
+  r = max(i,j)
+  ij = p+shiftr(r*r-r,1)
+  q = min(k,l)
+  s = max(k,l)
+  kl = q+shiftr(s*s-s,1)
+
+  idx = 2*idx-1
+
+  if (ij==kl) then !real, J -> map1, K -> map2
+    sign=0.d0
+    use_map1=.False.
+  else
+    if (ik.eq.jl) then
+      if (i.lt.k) then   !TT0
+        sign=1.d0
+        use_map1=.True. 
+      else               !FF0
+        sign=-1.d0
+        use_map1=.True.
+      endif
+    else if (i.eq.k) then
+      if (j.lt.l) then   !0T* 
+        sign=1.d0
+        use_map1=.True.
+      else               !0F*
+        sign=-1.d0
+        use_map1=.True.
+      endif
+    else if (j.eq.l) then
+      if (i.lt.k) then
+        sign=1.d0
+        use_map1=.True.
+      else
+        sign=-1.d0
+        use_map1=.True.
+      endif
+    else if ((i.lt.k).eqv.(j.lt.l)) then
+      if (i.lt.k) then
+        sign=1.d0
+        use_map1=.True.
+      else
+        sign=-1.d0
+        use_map1=.True.
+      endif
+    else
+      if ((j.lt.l).eqv.(ik.lt.jl)) then
+        sign=1.d0
+        use_map1=.False.
+      else
+        sign=-1.d0
+        use_map1=.False.
+      endif
+    endif
+  endif
+end
+
+complex*16 function get_ao_two_e_integral_complex_simple(i,j,k,l,map,map2) result(result)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets one AO bi-electronic integral from the AO map
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx1,idx2,idx
+  real(integral_kind)            :: tmp_re, tmp_im
+  integer(key_kind)              :: idx_re,idx_im
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: ii
+  complex(integral_kind)         :: tmp
+  integer(key_kind)              :: p,q,r,s,ik,jl
+  logical :: ilek, jlel, iklejl,use_map1
+  double precision :: sign
+  ! a.le.c, b.le.d, tri(a,c).le.tri(b,d)
+  PROVIDE ao_two_e_integrals_in_map
+  call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx,sign)
+  if (use_map1) then
+    call map_get(map,idx,tmp_re)
+    call map_get(map,idx+1,tmp_im)
+    tmp_im *= sign
+  else
+    call map_get(map2,idx,tmp_re)
+    if (sign/=0.d0) then
+      call map_get(map2,idx+1,tmp_im)
+      tmp_im *= sign
+    else
+      tmp_im=0.d0
+    endif
+  endif
+  tmp = dcmplx(tmp_re,tmp_im)
+  result = tmp
+end
+
+
+complex*16 function get_ao_two_e_integral_complex(i,j,k,l,map,map2) result(result)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets one AO bi-electronic integral from the AO map
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx1,idx2
+  real(integral_kind)            :: tmp_re, tmp_im
+  integer(key_kind)              :: idx_re,idx_im
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: ii
+  complex(integral_kind)         :: tmp
+  complex(integral_kind)         :: get_ao_two_e_integral_complex_simple
+  integer(key_kind)              :: p,q,r,s,ik,jl
+  logical :: ilek, jlel, iklejl
+  ! a.le.c, b.le.d, tri(a,c).le.tri(b,d)
+  PROVIDE ao_two_e_integrals_in_map ao_integrals_cache_complex ao_integrals_cache_min
+  !DIR$ FORCEINLINE
+  !logical, external              :: ao_two_e_integral_zero
+  !if (ao_two_e_integral_zero(i,j,k,l)) then
+  !  tmp = (0.d0,0.d0)
+  !else
+  if (.True.) then
+    ii = l-ao_integrals_cache_min
+    ii = ior(ii, k-ao_integrals_cache_min)
+    ii = ior(ii, j-ao_integrals_cache_min)
+    ii = ior(ii, i-ao_integrals_cache_min)
+    if (iand(ii, -64) /= 0) then
+      tmp = get_ao_two_e_integral_complex_simple(i,j,k,l,map,map2)
+    else
+      ii = l-ao_integrals_cache_min
+      ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
+      ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
+      ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
+      tmp = ao_integrals_cache_complex(ii)
+    endif
+  endif
+  result = tmp
+end
+
+
+subroutine get_ao_two_e_integrals_complex(j,k,l,sze,out_val)
+  use map_module
+  BEGIN_DOC
+  ! Gets multiple AO bi-electronic integral from the AO map .
+  ! All i are retrieved for j,k,l fixed.
+  ! physicist convention : <ij|kl> 
+  END_DOC
+  implicit none
+  integer, intent(in)            :: j,k,l, sze
+  complex*16, intent(out) :: out_val(sze)
+
+  integer                        :: i
+  integer(key_kind)              :: hash
+  !logical, external              :: ao_one_e_integral_zero
+  PROVIDE ao_two_e_integrals_in_map ao_integrals_map
+
+  !if (ao_one_e_integral_zero(j,l)) then
+  !  out_val = (0.d0,0.d0)
+  !  return
+  !endif
+
+  complex*16 :: get_ao_two_e_integral_complex
+  do i=1,sze
+    out_val(i) = get_ao_two_e_integral_complex(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+  enddo
+
+end
+
+subroutine get_ao_two_e_integrals_non_zero_complex(j,k,l,sze,out_val,out_val_index,non_zero_int)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets multiple AO bi-electronic integral from the AO map .
+  ! All non-zero i are retrieved for j,k,l fixed.
+  END_DOC
+  integer, intent(in)            :: j,k,l, sze
+  complex(integral_kind), intent(out) :: out_val(sze)
+  integer, intent(out)           :: out_val_index(sze),non_zero_int
+  print*,'not implemented for periodic',irp_here
+  stop -1
+  !placeholder to keep compiler from complaining about out values not assigned
+  out_val=0.d0
+  out_val_index=0
+  non_zero_int=0
+!
+!  integer                        :: i
+!  integer(key_kind)              :: hash
+!  double precision               :: thresh,tmp
+!  if(is_complex) then
+!    print*,'not implemented for periodic:',irp_here
+!    stop -1
+!  endif
+!  PROVIDE ao_two_e_integrals_in_map
+!  thresh = ao_integrals_threshold
+!
+!  non_zero_int = 0
+!  if (ao_overlap_abs(j,l) < thresh) then
+!    out_val = 0.d0
+!    return
+!  endif
+!
+!  non_zero_int = 0
+!  do i=1,sze
+!    integer, external :: ao_l4
+!    double precision, external :: ao_two_e_integral
+!    !DIR$ FORCEINLINE
+!    if (ao_two_e_integral_schwartz(i,k)*ao_two_e_integral_schwartz(j,l) < thresh) then
+!      cycle
+!    endif
+!    call two_e_integrals_index(i,j,k,l,hash)
+!    call map_get(ao_integrals_map, hash,tmp)
+!    if (dabs(tmp) < thresh ) cycle
+!    non_zero_int = non_zero_int+1
+!    out_val_index(non_zero_int) = i
+!    out_val(non_zero_int) = tmp
+!  enddo
+
+end
+
+
+subroutine get_ao_two_e_integrals_non_zero_jl_complex(j,l,thresh,sze_max,sze,out_val,out_val_index,non_zero_int)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets multiple AO bi-electronic integral from the AO map .
+  ! All non-zero i are retrieved for j,k,l fixed.
+  END_DOC
+  double precision, intent(in)   :: thresh
+  integer, intent(in)            :: j,l, sze,sze_max
+  complex(integral_kind), intent(out) :: out_val(sze_max)
+  integer, intent(out)           :: out_val_index(2,sze_max),non_zero_int
+  print*,'not implemented for periodic',irp_here
+  stop -1
+  !placeholder to keep compiler from complaining about out values not assigned
+  out_val=0.d0
+  out_val_index=0
+  non_zero_int=0
+!
+!  integer                        :: i,k
+!  integer(key_kind)              :: hash
+!  double precision               :: tmp
+!
+!  if(is_complex) then
+!    print*,'not implemented for periodic:',irp_here
+!    stop -1
+!  endif
+!  PROVIDE ao_two_e_integrals_in_map
+!  non_zero_int = 0
+!  if (ao_overlap_abs(j,l) < thresh) then
+!    out_val = 0.d0
+!    return
+!  endif
+!
+!  non_zero_int = 0
+!  do k = 1, sze
+!   do i = 1, sze
+!     integer, external :: ao_l4
+!     double precision, external :: ao_two_e_integral
+!     !DIR$ FORCEINLINE
+!     if (ao_two_e_integral_schwartz(i,k)*ao_two_e_integral_schwartz(j,l) < thresh) then
+!       cycle
+!     endif
+!     call two_e_integrals_index(i,j,k,l,hash)
+!     call map_get(ao_integrals_map, hash,tmp)
+!     if (dabs(tmp) < thresh ) cycle
+!     non_zero_int = non_zero_int+1
+!     out_val_index(1,non_zero_int) = i
+!     out_val_index(2,non_zero_int) = k
+!     out_val(non_zero_int) = tmp
+!   enddo
+!  enddo
+
+end
+
+
+subroutine get_ao_two_e_integrals_non_zero_jl_from_list_complex(j,l,thresh,list,n_list,sze_max,out_val,out_val_index,non_zero_int)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets multiple AO two-electron integrals from the AO map .
+  ! All non-zero i are retrieved for j,k,l fixed.
+  END_DOC
+  double precision, intent(in)   :: thresh
+  integer, intent(in)            :: sze_max
+  integer, intent(in)            :: j,l, n_list,list(2,sze_max)
+  complex(integral_kind), intent(out) :: out_val(sze_max)
+  integer, intent(out)           :: out_val_index(2,sze_max),non_zero_int
+  print*,'not implemented for periodic',irp_here
+  stop -1
+  !placeholder to keep compiler from complaining about out values not assigned
+  out_val=0.d0
+  out_val_index=0
+  non_zero_int=0
+!
+!  integer                        :: i,k
+!  integer(key_kind)              :: hash
+!  double precision               :: tmp
+!
+!  if(is_complex) then
+!    print*,'not implemented for periodic:',irp_here
+!    stop -1
+!  endif
+!  PROVIDE ao_two_e_integrals_in_map
+!  non_zero_int = 0
+!  if (ao_overlap_abs(j,l) < thresh) then
+!    out_val = 0.d0
+!    return
+!  endif
+!
+!  non_zero_int = 0
+! integer :: kk
+!  do kk = 1, n_list
+!   k = list(1,kk)
+!   i = list(2,kk)
+!   integer, external :: ao_l4
+!   double precision, external :: ao_two_e_integral
+!   !DIR$ FORCEINLINE
+!   if (ao_two_e_integral_schwartz(i,k)*ao_two_e_integral_schwartz(j,l) < thresh) then
+!     cycle
+!   endif
+!   call two_e_integrals_index(i,j,k,l,hash)
+!   call map_get(ao_integrals_map, hash,tmp)
+!   if (dabs(tmp) < thresh ) cycle
+!   non_zero_int = non_zero_int+1
+!   out_val_index(1,non_zero_int) = i
+!   out_val_index(2,non_zero_int) = k
+!   out_val(non_zero_int) = tmp
+!  enddo
+
+end
+
+subroutine insert_into_ao_integrals_map_2(n_integrals,buffer_i, buffer_values)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Create new entry into AO map
+  END_DOC
+
+  integer, intent(in)                :: n_integrals
+  integer(key_kind), intent(inout)   :: buffer_i(n_integrals)
+  real(integral_kind), intent(inout) :: buffer_values(n_integrals)
+
+  call map_append(ao_integrals_map_2, buffer_i, buffer_values, n_integrals)
+end
+
+
diff --git a/src/ao_two_e_ints/screening.irp.f b/src/ao_two_e_ints/screening.irp.f
index d3230370..eebe0043 100644
--- a/src/ao_two_e_ints/screening.irp.f
+++ b/src/ao_two_e_ints/screening.irp.f
@@ -3,7 +3,7 @@ logical function ao_two_e_integral_zero(i,j,k,l)
   integer, intent(in) :: i,j,k,l
 
   ao_two_e_integral_zero = .False.
-  if (.not.(read_ao_two_e_integrals.or.is_periodic)) then
+  if (.not.(read_ao_two_e_integrals.or.is_complex)) then
     if (ao_overlap_abs(j,l)*ao_overlap_abs(i,k)  < ao_integrals_threshold) then
         ao_two_e_integral_zero = .True.
         return
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index b6e959d7..c2a48a2f 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -345,6 +345,25 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
 
   double precision               :: map_mb
   PROVIDE read_ao_two_e_integrals io_ao_two_e_integrals
+  if (is_complex) then
+    if (read_ao_two_e_integrals) then
+      print*,'Reading the AO integrals (periodic)'
+      call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints_complex_1',ao_integrals_map)
+      call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints_complex_2',ao_integrals_map_2)
+      print*, 'AO integrals provided (periodic)'
+      ao_two_e_integrals_in_map = .True.
+      return
+    else if (read_df_ao_integrals) then
+      call ao_map_fill_from_df
+      print*, 'AO integrals provided from 3-index ao ints (periodic)'
+      ao_two_e_integrals_in_map = .True.
+      return
+    else
+      print*,'calculation of periodic AOs not implemented'
+      stop -1
+    endif
+
+  else
   if (read_ao_two_e_integrals) then
     print*,'Reading the AO integrals'
     call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
@@ -361,10 +380,9 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
       ! Avoid openMP
       integral = ao_two_e_integral(1,1,1,1)
     endif
-
     integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
     call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_integrals')
-
+  
     character(len=:), allocatable :: task
     allocate(character(len=ao_num*12) :: task)
     write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))'
@@ -417,7 +435,7 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
     endif
 
   endif
-
+  endif
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num)  ]
diff --git a/src/bitmask/bitmasks.irp.f b/src/bitmask/bitmasks.irp.f
index 91617397..a13644cd 100644
--- a/src/bitmask/bitmasks.irp.f
+++ b/src/bitmask/bitmasks.irp.f
@@ -80,9 +80,23 @@ BEGIN_PROVIDER [ integer(bit_kind), HF_bitmask, (N_int,2)]
   integer                        :: occ(elec_alpha_num)
   
   HF_bitmask = 0_bit_kind
-  do i=1,elec_alpha_num
-    occ(i) = i
-  enddo
+  if (is_complex) then
+    integer :: kpt,korb
+    kpt=1
+    korb=1
+    do i=1,elec_alpha_num
+      occ(i) = korb + (kpt-1) * mo_num_per_kpt
+      kpt += 1
+      if (kpt > kpt_num) then
+        kpt = 1
+        korb += 1
+      endif
+    enddo
+  else
+    do i=1,elec_alpha_num
+      occ(i) = i
+    enddo
+  endif
   call list_to_bitstring( HF_bitmask(1,1), occ, elec_alpha_num, N_int)
   ! elec_alpha_num <= elec_beta_num, so occ is already OK.
   call list_to_bitstring( HF_bitmask(1,2), occ, elec_beta_num, N_int)
@@ -240,3 +254,254 @@ BEGIN_PROVIDER [integer(bit_kind), closed_shell_ref_bitmask, (N_int,2)]
     closed_shell_ref_bitmask(i,2) = ior(ref_bitmask(i,2),act_bitmask(i,2))
   enddo
 END_PROVIDER
+ 
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+!BEGIN_PROVIDER [ integer(bit_kind), full_ijkl_bitmask, (N_int) ]
+!  implicit none
+!  BEGIN_DOC
+!  ! Bitmask to include all possible MOs
+!  END_DOC
+!  
+!  integer                        :: i,j,k
+!  k=0
+!  do j=1,N_int
+!    full_ijkl_bitmask(j) = 0_bit_kind
+!    do i=0,bit_kind_size-1
+!      k=k+1
+!      if (mo_class(k) /= 'Deleted') then
+!        full_ijkl_bitmask(j) = ibset(full_ijkl_bitmask(j),i)
+!      endif
+!      if (k == mo_num) exit
+!    enddo
+!  enddo
+!END_PROVIDER
+!
+!BEGIN_PROVIDER [ integer(bit_kind), full_ijkl_bitmask_4, (N_int,4) ]
+!  implicit none
+!  integer                        :: i
+!  do i=1,N_int
+!    full_ijkl_bitmask_4(i,1) = full_ijkl_bitmask(i)
+!    full_ijkl_bitmask_4(i,2) = full_ijkl_bitmask(i)
+!    full_ijkl_bitmask_4(i,3) = full_ijkl_bitmask(i)
+!    full_ijkl_bitmask_4(i,4) = full_ijkl_bitmask(i)
+!  enddo
+!END_PROVIDER
+!
+!BEGIN_PROVIDER [ integer(bit_kind), core_inact_act_bitmask_4, (N_int,4) ]
+!  implicit none
+!  integer                        :: i
+!  do i=1,N_int
+!    core_inact_act_bitmask_4(i,1) = reunion_of_core_inact_act_bitmask(i,1)
+!    core_inact_act_bitmask_4(i,2) = reunion_of_core_inact_act_bitmask(i,1)
+!    core_inact_act_bitmask_4(i,3) = reunion_of_core_inact_act_bitmask(i,1)
+!    core_inact_act_bitmask_4(i,4) = reunion_of_core_inact_act_bitmask(i,1)
+!  enddo
+!END_PROVIDER
+!
+!BEGIN_PROVIDER [ integer(bit_kind), virt_bitmask_4, (N_int,4) ]
+!  implicit none
+!  integer                        :: i
+!  do i=1,N_int
+!    virt_bitmask_4(i,1) = virt_bitmask(i,1)
+!    virt_bitmask_4(i,2) = virt_bitmask(i,1)
+!    virt_bitmask_4(i,3) = virt_bitmask(i,1)
+!    virt_bitmask_4(i,4) = virt_bitmask(i,1)
+!  enddo
+!END_PROVIDER
+!
+!
+!
+!
+BEGIN_PROVIDER [ integer(bit_kind), HF_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Hartree Fock bit mask
+  END_DOC
+  integer                        :: i,k
+  
+  hf_bitmask_kpts = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      hf_bitmask_kpts(i,1,k) = iand(hf_bitmask(i,1),kpts_bitmask(i,k))
+      hf_bitmask_kpts(i,2,k) = iand(hf_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), ref_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reference bit mask, used in Slater rules, chosen as Hartree-Fock bitmask
+  END_DOC
+  ref_bitmask_kpts = HF_bitmask_kpts
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ integer(bit_kind), generators_bitmask_kpts, (N_int,2,6,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmasks for generator determinants.
+  ! (N_int, alpha/beta, hole/particle, generator).
+  !
+  ! 3rd index is :
+  !
+  ! * 1 : hole     for single exc
+  !
+  ! * 2 : particle for single exc
+  !
+  ! * 3 : hole     for 1st exc of double
+  !
+  ! * 4 : particle for 1st exc of double
+  !
+  ! * 5 : hole     for 2nd exc of double
+  !
+  ! * 6 : particle for 2nd exc of double
+  !
+  END_DOC
+  logical                        :: exists
+  PROVIDE ezfio_filename full_ijkl_bitmask 
+  
+  integer                        :: ispin, i, k
+  do k=1,kpt_num
+      do ispin=1,2
+          do i=1,N_int
+            generators_bitmask_kpts(i,ispin,s_hole ,k) = reunion_of_inact_act_bitmask_kpts(i,ispin,k)
+            generators_bitmask_kpts(i,ispin,s_part ,k) = reunion_of_act_virt_bitmask_kpts(i,ispin,k)
+            generators_bitmask_kpts(i,ispin,d_hole1,k) = reunion_of_inact_act_bitmask_kpts(i,ispin,k)
+            generators_bitmask_kpts(i,ispin,d_part1,k) = reunion_of_act_virt_bitmask_kpts(i,ispin,k)
+            generators_bitmask_kpts(i,ispin,d_hole2,k) = reunion_of_inact_act_bitmask_kpts(i,ispin,k)
+            generators_bitmask_kpts(i,ispin,d_part2,k) = reunion_of_act_virt_bitmask_kpts(i,ispin,k)
+          enddo
+      enddo
+  enddo
+  
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), reunion_of_core_inact_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the core and inactive and virtual bitmasks
+  END_DOC
+  integer                        :: i,k
+  do k=1,kpt_num
+    do i = 1, N_int
+      reunion_of_core_inact_bitmask_kpts(i,1,k) = ior(core_bitmask_kpts(i,1,k),inact_bitmask_kpts(i,1,k))
+      reunion_of_core_inact_bitmask_kpts(i,2,k) = ior(core_bitmask_kpts(i,2,k),inact_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+
+BEGIN_PROVIDER [integer(bit_kind), reunion_of_inact_act_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the  inactive and active bitmasks
+  END_DOC
+  integer                        :: i,k
+  
+  do k=1,kpt_num
+    do i = 1, N_int
+      reunion_of_inact_act_bitmask_kpts(i,1,k) = ior(inact_bitmask_kpts(i,1,k),act_bitmask_kpts(i,1,k))
+      reunion_of_inact_act_bitmask_kpts(i,2,k) = ior(inact_bitmask_kpts(i,2,k),act_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [integer(bit_kind), reunion_of_act_virt_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the  inactive and active bitmasks
+  END_DOC
+  integer                        :: i,k
+  
+  do k=1,kpt_num
+    do i = 1, N_int
+      reunion_of_act_virt_bitmask_kpts(i,1,k) = ior(virt_bitmask_kpts(i,1,k),act_bitmask_kpts(i,1,k))
+      reunion_of_act_virt_bitmask_kpts(i,2,k) = ior(virt_bitmask_kpts(i,2,k),act_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+
+BEGIN_PROVIDER [integer(bit_kind), reunion_of_core_inact_act_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the core, inactive and active bitmasks
+  END_DOC
+  integer                        :: i,k
+
+  do k=1,kpt_num
+    do i = 1, N_int
+      reunion_of_core_inact_act_bitmask_kpts(i,1,k) = ior(reunion_of_core_inact_bitmask_kpts(i,1,k),act_bitmask_kpts(i,1,k))
+      reunion_of_core_inact_act_bitmask_kpts(i,2,k) = ior(reunion_of_core_inact_bitmask_kpts(i,2,k),act_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ integer(bit_kind), reunion_of_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the inactive, active and virtual bitmasks
+  END_DOC
+  integer                        :: i,k
+  do k=1,kpt_num
+    do i = 1, N_int
+      reunion_of_bitmask_kpts(i,1,k) = ior(ior(act_bitmask_kpts(i,1,k),inact_bitmask_kpts(i,1,k)),virt_bitmask_kpts(i,1,k))
+      reunion_of_bitmask_kpts(i,2,k) = ior(ior(act_bitmask_kpts(i,2,k),inact_bitmask_kpts(i,2,k)),virt_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), inact_virt_bitmask_kpts, (N_int,2,kpt_num)]
+&BEGIN_PROVIDER [ integer(bit_kind), core_inact_virt_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Reunion of the inactive and virtual bitmasks
+  END_DOC
+  integer                        :: i,k
+  do k=1,kpt_num
+    do i = 1, N_int
+      inact_virt_bitmask_kpts(i,1,k) = ior(inact_bitmask_kpts(i,1,k),virt_bitmask_kpts(i,1,k))
+      inact_virt_bitmask_kpts(i,2,k) = ior(inact_bitmask_kpts(i,2,k),virt_bitmask_kpts(i,2,k))
+      core_inact_virt_bitmask_kpts(i,1,k) = ior(core_bitmask_kpts(i,1,k),inact_virt_bitmask_kpts(i,1,k))
+      core_inact_virt_bitmask_kpts(i,2,k) = ior(core_bitmask_kpts(i,2,k),inact_virt_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ integer(bit_kind), unpaired_alpha_electrons_kpts, (N_int,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask reprenting the unpaired alpha electrons in the HF_bitmask
+  END_DOC
+  integer                        :: i,k
+  unpaired_alpha_electrons_kpts = 0_bit_kind
+  do k = 1, kpt_num
+    do i = 1, N_int
+      unpaired_alpha_electrons_kpts(i,k) = xor(HF_bitmask_kpts(i,1,k),HF_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [integer(bit_kind), closed_shell_ref_bitmask_kpts, (N_int,2,kpt_num)]
+  implicit none
+  integer                        :: i,k
+
+  closed_shell_ref_bitmask_kpts = 0_bit_kind
+  do k=1,kpt_num
+    do i = 1, N_int
+      closed_shell_ref_bitmask_kpts(i,1,k) = ior(ref_bitmask_kpts(i,1,k),act_bitmask_kpts(i,1,k))
+      closed_shell_ref_bitmask_kpts(i,2,k) = ior(ref_bitmask_kpts(i,2,k),act_bitmask_kpts(i,2,k))
+    enddo
+  enddo
+END_PROVIDER
+ 
diff --git a/src/bitmask/bitmasks_routines.irp.f b/src/bitmask/bitmasks_routines.irp.f
index 5c4bf347..8a374e94 100644
--- a/src/bitmask/bitmasks_routines.irp.f
+++ b/src/bitmask/bitmasks_routines.irp.f
@@ -214,6 +214,37 @@ subroutine print_spindet(string,Nint)
 
 end
 
+subroutine debug_single_spindet(string,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Subroutine to print the content of a determinant in '+-' notation and
+  ! hexadecimal representation.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  character*(2048)                :: output(1)
+  call bitstring_to_hexa( output(1), string(1), Nint )
+  print *,  trim(output(1))
+  call print_single_spindet(string,Nint)
+
+end
+
+subroutine print_single_spindet(string,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Subroutine to print the content of a determinant using the '+-' notation
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  character*(2048)                :: output(1)
+
+  call bitstring_to_str( output(1), string(1), Nint )
+  print *,  trim(output(1))
+
+end
+
 logical function is_integer_in_string(bite,string,Nint)
   use bitmasks
  implicit none
diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f
index d83d69e9..e304aa84 100644
--- a/src/bitmask/core_inact_act_virt.irp.f
+++ b/src/bitmask/core_inact_act_virt.irp.f
@@ -445,3 +445,515 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)]
 
 END_PROVIDER 
 
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+BEGIN_PROVIDER [ integer(bit_kind), kpts_bitmask , (N_int,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying each kpt
+  END_DOC
+  integer :: k,i,di
+  integer :: tmp_mo_list(mo_num_per_kpt)
+  kpts_bitmask  = 0_bit_kind
+  print*,'kpts bitmask'
+  do k=1,kpt_num
+    di=(k-1)*mo_num_per_kpt
+    do i=1,mo_num_per_kpt
+      tmp_mo_list(i) = i+di
+    enddo
+    call list_to_bitstring( kpts_bitmask(1,k), tmp_mo_list, mo_num_per_kpt, N_int)
+    !debugging
+    print*,'k = ',k
+    call debug_single_spindet(kpts_bitmask(1,k),N_int)
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_core_orb_kpts, (kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Number of core MOs
+  END_DOC
+  integer                        :: i,k,kshift
+
+  do k=1,kpt_num
+    n_core_orb_kpts(k) = 0
+    kshift = (k-1)*mo_num_per_kpt
+    do i = 1, mo_num_per_kpt
+      if(mo_class(i+kshift) == 'Core')then
+        n_core_orb_kpts(k) += 1
+      endif
+    enddo
+  enddo
+  
+!  call write_int(6,n_core_orb, 'Number of core     MOs')
+   
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_inact_orb_kpts, (kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Number of inactive MOs
+  END_DOC
+  integer                        :: i,k,kshift
+
+  do k=1,kpt_num
+    n_inact_orb_kpts(k) = 0
+    kshift = (k-1)*mo_num_per_kpt
+    do i = 1, mo_num_per_kpt
+      if(mo_class(i+kshift) == 'Inactive')then
+        n_inact_orb_kpts(k) += 1
+      endif
+    enddo
+  enddo
+  
+!  call write_int(6,n_inact_orb, 'Number of inactive MOs')
+   
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_act_orb_kpts, (kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Number of active MOs
+  END_DOC
+  integer                        :: i,k,kshift
+
+  do k=1,kpt_num
+    n_act_orb_kpts(k) = 0
+    kshift = (k-1)*mo_num_per_kpt
+    do i = 1, mo_num_per_kpt
+      if(mo_class(i+kshift) == 'Active')then
+        n_act_orb_kpts(k) += 1
+      endif
+    enddo
+  enddo
+  
+!  call write_int(6,n_act_orb, 'Number of active   MOs')
+   
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_virt_orb_kpts, (kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Number of virtual MOs
+  END_DOC
+  integer                        :: i,k,kshift
+
+  do k=1,kpt_num
+    n_virt_orb_kpts(k) = 0
+    kshift = (k-1)*mo_num_per_kpt
+    do i = 1, mo_num_per_kpt
+      if(mo_class(i+kshift) == 'Virtual')then
+        n_virt_orb_kpts(k) += 1
+      endif
+    enddo
+  enddo
+  
+!  call write_int(6,n_virt_orb, 'Number of virtual  MOs')
+   
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_del_orb_kpts, (kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Number of deleted MOs
+  END_DOC
+  integer                        :: i,k,kshift
+
+  do k=1,kpt_num
+    n_del_orb_kpts(k) = 0
+    kshift = (k-1)*mo_num_per_kpt
+    do i = 1, mo_num_per_kpt
+      if(mo_class(i+kshift) == 'Deleted')then
+        n_del_orb_kpts(k) += 1
+      endif
+    enddo
+  enddo
+  
+!  call write_int(6,n_del_orb, 'Number of deleted  MOs')
+   
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_core_inact_orb_kpts, (kpt_num) ]
+  !todo: finish implementation for kpts (will need kpts_bitmask)
+  implicit none
+  BEGIN_DOC
+  ! n_core + n_inact
+  END_DOC
+  integer                        :: i,k
+  do k=1,kpt_num
+  n_core_inact_orb_kpts(k) = 0
+  do i = 1, N_int
+    n_core_inact_orb_kpts(k) += popcnt(iand(kpts_bitmask(i,k),reunion_of_core_inact_bitmask(i,1)))
+  enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, n_inact_act_orb_kpts, (kpt_num) ]
+   implicit none
+  BEGIN_DOC
+  ! n_inact + n_act
+  END_DOC
+  integer :: k
+  do k=1,kpt_num
+    n_inact_act_orb_kpts(k) = (n_inact_orb_kpts(k)+n_act_orb_kpts(k))
+  enddo
+END_PROVIDER
+ 
+BEGIN_PROVIDER [integer, dim_list_core_orb_kpts]
+  implicit none
+  BEGIN_DOC
+  ! dimensions for the allocation of list_core.
+  ! it is at least 1
+  END_DOC
+   dim_list_core_orb_kpts = max(maxval(n_core_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_inact_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_inact.
+   ! it is at least 1
+   END_DOC
+   dim_list_inact_orb_kpts = max(maxval(n_inact_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_core_inact_orb_kpts]
+  implicit none
+  BEGIN_DOC
+  ! dimensions for the allocation of list_core.
+  ! it is at least 1
+  END_DOC
+   dim_list_core_inact_orb_kpts = max(maxval(n_core_inact_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_act_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_act.
+   ! it is at least 1
+   END_DOC
+   dim_list_act_orb_kpts = max(maxval(n_act_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_virt_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_virt.
+   ! it is at least 1
+   END_DOC
+   dim_list_virt_orb_kpts = max(maxval(n_virt_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_del_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_del.
+   ! it is at least 1
+   END_DOC
+   dim_list_del_orb_kpts = max(maxval(n_del_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_core_inact_act_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_core_inact_act.
+   ! it is at least 1
+   END_DOC
+   dim_list_core_inact_act_orb_kpts = max(maxval(n_core_inact_act_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, dim_list_inact_act_orb_kpts]
+   implicit none
+   BEGIN_DOC
+   ! dimensions for the allocation of list_inact_act.
+   ! it is at least 1
+   END_DOC
+   dim_list_inact_act_orb_kpts = max(maxval(n_inact_act_orb_kpts),1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, n_core_inact_act_orb_kpts, (kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  !  Number of core inactive and active MOs
+  END_DOC
+  integer :: k
+  do k=1,kpt_num
+    n_core_inact_act_orb_kpts(k) = (n_core_orb_kpts(k) + n_inact_orb_kpts(k) + n_act_orb_kpts(k))
+  enddo
+END_PROVIDER
+ 
+
+
+
+BEGIN_PROVIDER [ integer(bit_kind), core_bitmask_kpts , (N_int,2,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying the core MOs 
+  END_DOC
+  integer :: k,i
+  core_bitmask_kpts  = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      core_bitmask_kpts(i,1,k) = iand(core_bitmask(i,1),kpts_bitmask(i,k))
+      core_bitmask_kpts(i,2,k) = iand(core_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), inact_bitmask_kpts , (N_int,2,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying the inactive MOs 
+  END_DOC
+  integer :: k,i
+  inact_bitmask_kpts  = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      inact_bitmask_kpts(i,1,k) = iand(inact_bitmask(i,1),kpts_bitmask(i,k))
+      inact_bitmask_kpts(i,2,k) = iand(inact_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), act_bitmask_kpts , (N_int,2,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying the active MOs 
+  END_DOC
+  integer :: k,i
+  act_bitmask_kpts  = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      act_bitmask_kpts(i,1,k) = iand(act_bitmask(i,1),kpts_bitmask(i,k))
+      act_bitmask_kpts(i,2,k) = iand(act_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), virt_bitmask_kpts , (N_int,2,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying the virtual MOs 
+  END_DOC
+  integer :: k,i
+  virt_bitmask_kpts  = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      virt_bitmask_kpts(i,1,k) = iand(virt_bitmask(i,1),kpts_bitmask(i,k))
+      virt_bitmask_kpts(i,2,k) = iand(virt_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer(bit_kind), del_bitmask_kpts , (N_int,2,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Bitmask identifying the deleted MOs 
+  END_DOC
+  integer :: k,i
+  del_bitmask_kpts  = 0_bit_kind
+  do k=1,kpt_num
+    do i=1,N_int
+      del_bitmask_kpts(i,1,k) = iand(del_bitmask(i,1),kpts_bitmask(i,k))
+      del_bitmask_kpts(i,2,k) = iand(del_bitmask(i,2),kpts_bitmask(i,k))
+    enddo
+  enddo
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, list_core_kpts        , (dim_list_core_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_core_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of MO indices which are in the core.
+   END_DOC
+   integer                        :: i, n,k,di
+   list_core_kpts = 0
+   list_core_kpts_reverse = 0
+
+   do k=1,kpt_num
+     n=0
+     di = (k-1)*mo_num_per_kpt
+     do i = 1, mo_num_per_kpt
+       if(mo_class(i+di) == 'Core')then
+         n += 1
+         list_core_kpts(n,k) = i
+         list_core_kpts_reverse(i,k) = n
+       endif
+     enddo
+     print *,  'Core MOs: ',k
+     print *,  list_core_kpts(1:n_core_orb_kpts(k),k)
+   enddo
+   
+END_PROVIDER
+ 
+ BEGIN_PROVIDER [ integer, list_inact_kpts        , (dim_list_inact_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_inact_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of MO indices which are inactive.
+   END_DOC
+   integer                        :: i, n,k,di
+   list_inact_kpts = 0
+   list_inact_kpts_reverse = 0
+
+   do k=1,kpt_num
+     n=0
+     di = (k-1)*mo_num_per_kpt
+     do i = 1, mo_num_per_kpt
+       if(mo_class(i+di) == 'Inactive')then
+         n += 1
+         list_inact_kpts(n,k) = i
+         list_inact_kpts_reverse(i,k) = n
+       endif
+     enddo
+     print *,  'Inactive MOs: ',k
+     print *,  list_inact_kpts(1:n_inact_orb_kpts(k),k)
+   enddo
+   
+END_PROVIDER
+ 
+ BEGIN_PROVIDER [ integer, list_virt_kpts        , (dim_list_virt_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_virt_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of MO indices which are virtual.
+   END_DOC
+   integer                        :: i, n,k,di
+   list_virt_kpts = 0
+   list_virt_kpts_reverse = 0
+
+   do k=1,kpt_num
+     n=0
+     di = (k-1)*mo_num_per_kpt
+     do i = 1, mo_num_per_kpt
+       if(mo_class(i+di) == 'Virtual')then
+         n += 1
+         list_virt_kpts(n,k) = i
+         list_virt_kpts_reverse(i,k) = n
+       endif
+     enddo
+     print *,  'Virtual MOs: ',k
+     print *,  list_virt_kpts(1:n_virt_orb_kpts(k),k)
+   enddo
+   
+END_PROVIDER
+ 
+ BEGIN_PROVIDER [ integer, list_del_kpts        , (dim_list_del_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_del_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of MO indices which are deleted.
+   END_DOC
+   integer                        :: i, n,k,di
+   list_del_kpts = 0
+   list_del_kpts_reverse = 0
+
+   do k=1,kpt_num
+     n=0
+     di = (k-1)*mo_num_per_kpt
+     do i = 1, mo_num_per_kpt
+       if(mo_class(i+di) == 'Deleted')then
+         n += 1
+         list_del_kpts(n,k) = i
+         list_del_kpts_reverse(i,k) = n
+       endif
+     enddo
+     print *,  'Deleted MOs: ',k
+     print *,  list_del_kpts(1:n_del_orb_kpts(k),k)
+   enddo
+   
+END_PROVIDER
+ 
+ BEGIN_PROVIDER [ integer, list_act_kpts        , (dim_list_act_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_act_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of MO indices which are active.
+   END_DOC
+   integer                        :: i, n,k,di
+   list_act_kpts = 0
+   list_act_kpts_reverse = 0
+
+   do k=1,kpt_num
+     n=0
+     di = (k-1)*mo_num_per_kpt
+     do i = 1, mo_num_per_kpt
+       if(mo_class(i+di) == 'Active')then
+         n += 1
+         list_act_kpts(n,k) = i
+         list_act_kpts_reverse(i,k) = n
+       endif
+     enddo
+     print *,  'Active MOs: ',k
+     print *,  list_act_kpts(1:n_act_orb_kpts(k),k)
+   enddo
+   
+END_PROVIDER
+
+!todo: finish below for kpts
+ 
+ BEGIN_PROVIDER [ integer, list_core_inact_kpts        , (dim_list_core_inact_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_core_inact_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! List of indices of the core and inactive MOs
+  END_DOC
+  integer                        :: i,itmp,k
+  list_core_inact_kpts_reverse = 0
+  do k=1,kpt_num
+    !call bitstring_to_list(reunion_of_core_inact_bitmask(1,1), list_core_inact, itmp, N_int)
+    call bitstring_to_list(reunion_of_core_inact_bitmask_kpts(1,1,k), list_core_inact_kpts(1,k), itmp, N_int)
+    ASSERT (itmp == n_core_inact_orb_kpts(k))
+    do i = 1, n_core_inact_orb_kpts(k)
+      list_core_inact_kpts_reverse(list_core_inact_kpts(i,k),k) = i
+    enddo
+    print *,  'Core and Inactive MOs: ',k
+    print *,  list_core_inact_kpts(1:n_core_inact_orb_kpts(k),k)
+  enddo
+END_PROVIDER
+ 
+ 
+ BEGIN_PROVIDER [ integer, list_core_inact_act_kpts        , (dim_list_core_inact_act_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_core_inact_act_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! List of indices of the core inactive and active MOs
+  END_DOC
+  integer                        :: i,itmp,k
+  list_core_inact_act_kpts_reverse = 0
+  do k=1,kpt_num
+    !call bitstring_to_list(reunion_of_core_inact_act_bitmask(1,1), list_core_inact_act, itmp, N_int)
+    call bitstring_to_list(reunion_of_core_inact_act_bitmask_kpts(1,1,k), list_core_inact_act_kpts(1,k), itmp, N_int)
+    ASSERT (itmp == n_core_inact_act_orb_kpts(k))
+    do i = 1, n_core_inact_act_orb_kpts(k)
+      list_core_inact_act_kpts_reverse(list_core_inact_act_kpts(i,k),k) = i
+    enddo
+    print *,  'Core, Inactive and Active MOs: ',k
+    print *,  list_core_inact_act_kpts(1:n_core_inact_act_orb_kpts(k),k)
+  enddo
+END_PROVIDER
+ 
+ 
+ BEGIN_PROVIDER [ integer, list_inact_act_kpts        , (dim_list_inact_act_orb_kpts,kpt_num) ]
+&BEGIN_PROVIDER [ integer, list_inact_act_kpts_reverse, (mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! List of indices of the inactive and active MOs
+   END_DOC
+   integer                        :: i,itmp,k
+   list_inact_act_kpts_reverse = 0
+   do k=1,kpt_num
+     call bitstring_to_list(reunion_of_inact_act_bitmask_kpts(1,1,k), list_inact_act_kpts(1,k), itmp, N_int)
+     ASSERT (itmp == n_inact_act_orb_kpts(k))
+     do i = 1, n_inact_act_orb_kpts(k)
+       list_inact_act_kpts_reverse(list_inact_act_kpts(i,k),k) = i
+     enddo
+     print *,  'Inactive and Active MOs: ',k
+     print *,  list_inact_act_kpts(1:n_inact_act_orb_kpts(k),k)
+   enddo
+END_PROVIDER
+
+!todo: {n,list}_all_but_del_orb_kpts 
diff --git a/src/bitmask/track_orb.irp.f b/src/bitmask/track_orb.irp.f
index 1cdde9cb..9e96cca5 100644
--- a/src/bitmask/track_orb.irp.f
+++ b/src/bitmask/track_orb.irp.f
@@ -7,56 +7,148 @@ BEGIN_PROVIDER [ double precision, mo_coef_begin_iteration, (ao_num,mo_num) ]
    END_DOC
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, mo_coef_begin_iteration_complex, (ao_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Void provider to store the coefficients of the |MO| basis at the beginning of the SCF iteration
+   !
+   ! Useful to track some orbitals
+   END_DOC
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, mo_coef_begin_iteration_kpts, (ao_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Void provider to store the coefficients of the |MO| basis at the beginning of the SCF iteration
+   !
+   ! Useful to track some orbitals
+   END_DOC
+END_PROVIDER
+
 subroutine initialize_mo_coef_begin_iteration
  implicit none
  BEGIN_DOC
  !
  ! Initialize :c:data:`mo_coef_begin_iteration` to the current :c:data:`mo_coef`
  END_DOC
- mo_coef_begin_iteration = mo_coef
+ if (is_complex) then
+   !mo_coef_begin_iteration_complex = mo_coef_complex
+   mo_coef_begin_iteration_kpts = mo_coef_kpts
+ else
+   mo_coef_begin_iteration = mo_coef
+ endif
 end
 
 subroutine reorder_core_orb
- implicit none
- BEGIN_DOC
-! routines that takes the current :c:data:`mo_coef` and reorder the core orbitals (see :c:data:`list_core` and :c:data:`n_core_orb`) according to the overlap with :c:data:`mo_coef_begin_iteration`
- END_DOC
- integer :: i,j,iorb
- integer :: k,l
- double precision, allocatable :: accu(:)
- integer, allocatable :: index_core_orb(:),iorder(:)
- double precision, allocatable :: mo_coef_tmp(:,:)
- allocate(accu(mo_num),index_core_orb(n_core_orb),iorder(mo_num))
- allocate(mo_coef_tmp(ao_num,mo_num))
+  implicit none
+  BEGIN_DOC
+  ! TODO: test for complex
+ ! routines that takes the current :c:data:`mo_coef` and reorder the core orbitals (see :c:data:`list_core` and :c:data:`n_core_orb`) according to the overlap with :c:data:`mo_coef_begin_iteration`
+  END_DOC
+  integer :: i,j,iorb
+  integer :: k,l
+  integer, allocatable :: index_core_orb(:),iorder(:)
+  double precision, allocatable :: accu(:)
+  integer :: i1,i2
+  if (is_complex) then
+    complex*16, allocatable :: accu_c(:)
+    !allocate(accu(mo_num),accu_c(mo_num),index_core_orb(n_core_orb),iorder(mo_num))
+    !do i = 1, n_core_orb
+    !  iorb = list_core(i)
+    !  do j = 1, mo_num
+    !    accu(j) = 0.d0
+    !    accu_c(j) = (0.d0,0.d0)
+    !    iorder(j) = j
+    !    do k = 1, ao_num
+    !      do l = 1, ao_num
+    !        accu_c(j) += dconjg(mo_coef_begin_iteration_complex(k,iorb)) * &
+    !                     mo_coef_complex(l,j) * ao_overlap_complex(k,l)
+    !      enddo
+    !    enddo
+    !    accu(j) = -cdabs(accu_c(j))
+    !  enddo
+    !  call dsort(accu,iorder,mo_num)
+    !  index_core_orb(i) = iorder(1)
+    !enddo
 
- do i = 1, n_core_orb
-  iorb = list_core(i)
-  do j = 1, mo_num
-   accu(j) = 0.d0
-   iorder(j) = j
-   do k = 1, ao_num
-    do l = 1, ao_num
-     accu(j) += mo_coef_begin_iteration(k,iorb) * mo_coef(l,j) * ao_overlap(k,l)
+    !complex*16 :: x_c
+    !do j = 1, n_core_orb
+    !  i1 = list_core(j)
+    !  i2 = index_core_orb(j)
+    !  do i=1,ao_num
+    !     x_c = mo_coef_complex(i,i1)
+    !     mo_coef_complex(i,i1) = mo_coef_complex(i,i2)
+    !     mo_coef_complex(i,i2) = x_c
+    !  enddo
+    !enddo
+    !!call loc_cele_routine
+   
+    !deallocate(accu,accu_c,index_core_orb, iorder)
+    allocate(accu(mo_num_per_kpt),accu_c(mo_num_per_kpt),index_core_orb(n_core_orb),iorder(mo_num_per_kpt))
+    integer :: kk
+    do kk=1,kpt_num
+      do i = 1, n_core_orb_kpts(kk)
+        iorb = list_core_kpts(i,kk)
+        do j = 1, mo_num_per_kpt
+          accu(j) = 0.d0
+          accu_c(j) = (0.d0,0.d0)
+          iorder(j) = j
+          do k = 1, ao_num_per_kpt
+            do l = 1, ao_num_per_kpt
+              accu_c(j) += dconjg(mo_coef_begin_iteration_kpts(k,iorb,kk)) * &
+                           mo_coef_kpts(l,j,kk) * ao_overlap_kpts(k,l,kk)
+            enddo
+          enddo
+          accu(j) = -cdabs(accu_c(j))
+        enddo
+        call dsort(accu,iorder,mo_num_per_kpt)
+        index_core_orb(i) = iorder(1)
+      enddo
+
+      complex*16 :: x_c
+      do j = 1, n_core_orb
+        i1 = list_core_kpts(j,kk)
+        i2 = index_core_orb(j)
+        do i=1,ao_num_per_kpt
+           x_c = mo_coef_kpts(i,i1,kk)
+           mo_coef_kpts(i,i1,kk) = mo_coef_kpts(i,i2,kk)
+           mo_coef_kpts(i,i2,kk) = x_c
+        enddo
+      enddo
+      !call loc_cele_routine
     enddo
-   enddo
-   accu(j) = -dabs(accu(j))
-  enddo
-  call dsort(accu,iorder,mo_num)
-  index_core_orb(i) = iorder(1)
- enddo
-
- double precision :: x
- integer :: i1,i2
- do j = 1, n_core_orb
-  i1 = list_core(j)
-  i2 = index_core_orb(j)
-  do i=1,ao_num
-    x = mo_coef(i,i1)
-    mo_coef(i,i1) = mo_coef(i,i2)
-    mo_coef(i,i2) = x
-  enddo
- enddo
-!call loc_cele_routine
-
- deallocate(accu,index_core_orb, iorder)
+    deallocate(accu,accu_c,index_core_orb, iorder)
+  else
+    allocate(accu(mo_num),index_core_orb(n_core_orb),iorder(mo_num))
+   
+    do i = 1, n_core_orb
+      iorb = list_core(i)
+      do j = 1, mo_num
+        accu(j) = 0.d0
+        iorder(j) = j
+        do k = 1, ao_num
+          do l = 1, ao_num
+            accu(j) += mo_coef_begin_iteration(k,iorb) * mo_coef(l,j) * ao_overlap(k,l)
+          enddo
+        enddo
+        accu(j) = -dabs(accu(j))
+      enddo
+      call dsort(accu,iorder,mo_num)
+      index_core_orb(i) = iorder(1)
+    enddo
+   
+    double precision :: x
+    do j = 1, n_core_orb
+      i1 = list_core(j)
+      i2 = index_core_orb(j)
+      do i=1,ao_num
+         x = mo_coef(i,i1)
+         mo_coef(i,i1) = mo_coef(i,i2)
+         mo_coef(i,i2) = x
+      enddo
+    enddo
+    !call loc_cele_routine
+   
+    deallocate(accu,index_core_orb, iorder)
+  endif
 end
diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f
index 34b16ff3..8882ee88 100644
--- a/src/cipsi/cipsi.irp.f
+++ b/src/cipsi/cipsi.irp.f
@@ -38,29 +38,47 @@ subroutine run_cipsi
   pt2_data % rpt2 = -huge(1.e0)
   pt2_data % overlap(:,:) = 0.d0
   pt2_data % variance = huge(1.e0)
+  if (is_complex) then
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
+
 
   if (s2_eig) then
     call make_s2_eigenfunction
   endif
-  call diagonalize_CI
+  if (is_complex) then
+    call diagonalize_ci_complex
+  else
+    call diagonalize_CI
+  endif
   call save_wavefunction
 
   call ezfio_has_hartree_fock_energy(has)
   if (has) then
     call ezfio_get_hartree_fock_energy(hf_energy_ref)
   else
-    hf_energy_ref = ref_bitmask_energy
+    hf_energy_ref = ref_bitmask_energy_with_nucl_rep
   endif
 
   if (N_det > N_det_max) then
     psi_det = psi_det_sorted
-    psi_coef = psi_coef_sorted
-    N_det = N_det_max
-    soft_touch N_det psi_det psi_coef
+    if (is_complex) then
+      psi_coef_complex = psi_coef_sorted_complex
+      N_det = N_det_max
+      soft_touch N_det psi_det psi_coef_complex
+    else
+      psi_coef = psi_coef_sorted
+      N_det = N_det_max
+      soft_touch N_det psi_det psi_coef
+    endif
     if (s2_eig) then
       call make_s2_eigenfunction
     endif
-    call diagonalize_CI
+    if (is_complex) then
+      call diagonalize_CI_complex
+    else
+      call diagonalize_CI
+    endif
     call save_wavefunction
   endif
 
@@ -116,11 +134,19 @@ subroutine run_cipsi
     call copy_H_apply_buffer_to_wf()
 !    call save_wavefunction
 
-    PROVIDE  psi_coef
+    if (is_complex) then
+      PROVIDE  psi_coef_complex
+    else
+      PROVIDE  psi_coef
+    endif
     PROVIDE  psi_det
     PROVIDE  psi_det_sorted
 
-    call diagonalize_CI
+    if (is_complex) then
+      call diagonalize_ci_complex
+    else
+      call diagonalize_CI
+    endif
     call save_wavefunction
     call save_energy(psi_energy_with_nucl_rep, zeros)
     if (qp_stop()) exit
@@ -128,7 +154,11 @@ subroutine run_cipsi
 
   if (.not.qp_stop()) then
     if (N_det < N_det_max) then
-        call diagonalize_CI
+        if (is_complex) then
+          call diagonalize_ci_complex
+        else
+          call diagonalize_CI
+        endif
         call save_wavefunction
         call save_energy(psi_energy_with_nucl_rep, zeros)
     endif
diff --git a/src/cipsi/d1_new.irp.f b/src/cipsi/d1_new.irp.f
new file mode 100644
index 00000000..f9a7b7f9
--- /dev/null
+++ b/src/cipsi/d1_new.irp.f
@@ -0,0 +1,714 @@
+subroutine get_d1_kpts_new(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  complex*16, intent(in)   :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  complex*16, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+
+  integer :: kp1,ip1, kp2,ip2, khfix,ihfix, kputi,iputi, kputj,iputj, putj0
+  integer :: kpfix, ipfix, puti0
+  integer :: kputi1,kputi2,puti01,puti02
+  integer :: ii0
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  complex*16, allocatable :: hij_cache(:,:),hij_cache2(:,:)
+  complex*16               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  complex*16               :: tmp_row_kpts(N_states, mo_num), tmp_row2_kpts(N_states, mo_num)
+  complex*16               :: tmp_row_kpts2(N_states, mo_num_per_kpt), tmp_row2_kpts2(N_states,mo_num_per_kpt)
+  complex*16 :: tmp_mat1(N_states,mo_num,mo_num), tmp_mat2(N_states,mo_num,mo_num)
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2),hij_cache2(mo_num_per_kpt,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+!    kputi = (puti-1)/mo_num_per_kpt + 1
+!    khfix = (hfix-1)/mo_num_per_kpt + 1
+!    kp1   =   (p1-1)/mo_num_per_kpt + 1
+!    kp2   =   (p2-1)/mo_num_per_kpt + 1
+!    iputi = mod(puti-1,mo_num_per_kpt) + 1
+!    ihfix = mod(hfix-1,mo_num_per_kpt) + 1
+!    ip1   = mod(p1-1,  mo_num_per_kpt) + 1
+!    ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+
+    if(.not. bannedOrb(puti, mi)) then
+      !==================
+      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      !==================
+!      call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+      tmp_row = (0.d0,0.d0)
+!      tmp_row_kpts2 = (0.d0,0.d0)
+!      kputj = kconserv(kp1,kp2,khfix)
+!      putj0 = (kputj-1)*mo_num_per_kpt
+      !==================
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      !===========================
+      ! begin kpts testing
+!      do putj = putj0+1, hfix-1
+!        iputj = putj-putj0
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!        hij = hij_cache2(iputj,1) - hij_cache2(iputj,2)
+!        if (hij /= (0.d0,0.d0)) then
+!          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            !tmp_row_kpts(k,putj) = tmp_row_kpts(k,putj) + hij * coefs(k)
+!            tmp_row_kpts2(k,iputj) = tmp_row_kpts2(k,iputj) + hij * coefs(k)
+!          enddo
+!        endif
+!      end do
+!      do putj = hfix+1,putj0+mo_num_per_kpt
+!        iputj = putj - putj0
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!        hij = hij_cache2(iputj,2) - hij_cache2(iputj,1)
+!        if (hij /= (0.d0,0.d0)) then
+!          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            !tmp_row_kpts(k,putj) = tmp_row_kpts(k,putj) + hij * coefs(k)
+!            tmp_row_kpts2(k,iputj) = tmp_row_kpts2(k,iputj) + hij * coefs(k)
+!          enddo
+!        endif
+!      end do
+!      ! end kpts testing
+!      !===========================================================
+!      !print*,'tmp_row_k,tmp_row'
+!      !do ii0=1,mo_num
+!      !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+!      !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG, ',ii0,hfix,p1,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!      !  endif
+!      !enddo
+!      !===========================================================
+!      tmp_mat1 = (0.d0,0.d0)
+!      tmp_mat2 = (0.d0,0.d0)
+      !===========================================================
+      if(ma == 1) then
+      !===========================================================
+!        tmp_mat1(1:N_states,1:mo_num,puti) = tmp_mat1(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+!        tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) = tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+!                                         tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+!      !===========================================================
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+!        mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) = mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+!                                         tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+      else
+      !===========================================================
+!        do l=1,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_mat1(k,puti,l) = tmp_mat1(k,puti,l) + tmp_row(k,l)
+!          enddo
+!        enddo
+!        do l=1,mo_num_per_kpt
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_mat2(k,puti,l+putj0) = tmp_mat2(k,puti,l+putj0) + tmp_row_kpts2(k,l)
+!          enddo
+!        enddo
+      !===========================================================
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+        !do l=1,mo_num_per_kpt
+        !  !DIR$ LOOP COUNT AVG(4)
+        !  do k=1,N_states
+        !    mat(k,puti,l+putj0) = mat(k,puti,l+putj0) + tmp_row_kpts2(k,l)
+        !  enddo
+        !enddo
+      end if
+      !===========================================================
+        !do k=1,N_states
+        !  do l=1,mo_num
+        !    do ii0=1,mo_num
+        !      if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+        !        print'((A),6(I5),2(2(E25.15),2X))','WarNInG 4a, ',k,l,ii0,hfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+        !    !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+        !    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+        !      endif
+        !    enddo
+        !  enddo
+        !enddo
+      !===========================================================
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+!    kpfix = (pfix-1)/mo_num_per_kpt + 1
+!    ipfix = mod(pfix-1,mo_num_per_kpt) + 1
+    tmp_row = (0.d0,0.d0)
+    tmp_row2 = (0.d0,0.d0)
+!    !tmp_row_kpts = (0.d0,0.d0)
+!    !tmp_row2_kpts = (0.d0,0.d0)
+!    tmp_row_kpts2 = (0.d0,0.d0)
+!    tmp_row2_kpts2 = (0.d0,0.d0)
+    !===========================================================
+    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    !===========================================================
+!    call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,pfix,ipfix,kpfix,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+!    call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,pfix,ipfix,kpfix,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+    putj = p1
+    !============
+    !begin ref
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+!    enddo
+!      
+      putj = p2
+!    do puti=1,mo_num !HOT
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+    end do
+!    !end ref
+!    !===================
+!    !begin kpts
+!    if (kp1.eq.kp2) then
+!    !if (.False.) then
+!      kputi1 = kconserv(kpfix,kp1,khfix)
+!      kputi2 = kputi1
+!      puti01 = (kputi1-1)*mo_num_per_kpt
+!      puti02 = puti01
+!      do iputi=1,mo_num_per_kpt !HOT
+!        puti = puti01 + iputi
+!        if(lbanned(puti,mi)) cycle
+!        !p1 fixed
+!        putj = p1
+!        if(.not. banned(putj,puti,bant)) then
+!          hij = hij_cache2(iputi,2)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_row_kpts2(k,iputi) = tmp_row_kpts2(k,iputi) + hij * coefs(k)
+!              !tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+!            enddo
+!          endif
+!        end if
+!!      enddo
+!!        
+!        putj = p2
+!!      do puti=1,mo_num !HOT
+!        if(.not. banned(putj,puti,bant)) then
+!          hij = hij_cache2(iputi,1)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!            do k=1,N_states
+!              tmp_row2_kpts2(k,iputi) = tmp_row2_kpts2(k,iputi) + hij * coefs(k)
+!              !tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+!            enddo
+!          endif
+!        end if
+!      end do
+!    else !kp1.ne.kp2
+!      kputi2 = kconserv(kpfix,kp2,khfix)
+!      puti02 = (kputi2-1)*mo_num_per_kpt
+!      putj = p1
+!      do iputi=1,mo_num_per_kpt !HOT
+!        puti = puti02 + iputi
+!        if(lbanned(puti,mi)) cycle
+!        !p1 fixed
+!        if(.not. banned(putj,puti,bant)) then
+!          hij = hij_cache2(iputi,2)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_row_kpts2(k,iputi) = tmp_row_kpts2(k,iputi) + hij * coefs(k)
+!              !tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+!            enddo
+!          endif
+!        end if
+!      enddo
+!!        
+!      putj = p2
+!      kputi1 = kconserv(kpfix,kp1,khfix)
+!      puti01 = (kputi1-1)*mo_num_per_kpt
+!      do iputi=1,mo_num_per_kpt !HOT
+!        puti = puti01 + iputi
+!        if(lbanned(puti,mi)) cycle
+!        if(.not. banned(putj,puti,bant)) then
+!          hij = hij_cache2(iputi,1)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!            do k=1,N_states
+!              tmp_row2_kpts2(k,iputi) = tmp_row2_kpts2(k,iputi) + hij * coefs(k)
+!              !tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+!            enddo
+!          endif
+!        end if
+!      end do
+!    endif
+!    !end kpts
+!    !===================
+!    !test printing
+!    !print'((A),5(I5))','kpt info1: ',kconserv(kpfix,kp2,khfix),khfix,kpfix,kp2,kputi2
+!    !print'((A),5(I5))','kpt info2: ',kconserv(kpfix,kp1,khfix),khfix,kpfix,kp1,kputi1
+!    !do ii0=1,mo_num
+!    !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+!    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!    !!  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+!    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!    !  endif
+!    !  if (cdabs(tmp_row2_kpts(1,ii0)-tmp_row2(1,ii0)).gt.1.d-12) then
+!    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2a, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+!    !!  else if ((cdabs(tmp_row2_kpts(1,ii0))+cdabs(tmp_row2(1,ii0))).gt.1.d-12) then
+!    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2b, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+!    !  endif
+!    !enddo
+!    !===================
+!
+!    tmp_mat1 = (0.d0,0.d0)
+!    tmp_mat2 = (0.d0,0.d0)
+    if(mi == 1) then
+!    !===================
+!      tmp_mat1(:,:,p1) = tmp_mat1(:,:,p1) + tmp_row(:,:)
+!      tmp_mat1(:,:,p2) = tmp_mat1(:,:,p2) + tmp_row2(:,:)
+!      tmp_mat2(:,puti02+1:puti02+mo_num_per_kpt,p1) = tmp_mat2(:,puti02+1:puti02+mo_num_per_kpt,p1) + tmp_row_kpts2(:,:)
+!      tmp_mat2(:,puti01+1:puti01+mo_num_per_kpt,p2) = tmp_mat2(:,puti01+1:puti01+mo_num_per_kpt,p2) + tmp_row2_kpts2(:,:)
+!    !===================
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+!      mat(:,puti02+1:puti02+mo_num_per_kpt,p1) = mat(:,puti02+1:puti02+mo_num_per_kpt,p1) + tmp_row_kpts2(:,:)
+!      mat(:,puti01+1:puti01+mo_num_per_kpt,p2) = mat(:,puti01+1:puti01+mo_num_per_kpt,p2) + tmp_row2_kpts2(:,:)
+    else
+    !===================
+!      do l=1,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          tmp_mat1(k,p1,l) = tmp_mat1(k,p1,l) + tmp_row(k,l)
+!          tmp_mat1(k,p2,l) = tmp_mat1(k,p2,l) + tmp_row2(k,l)
+!        enddo
+!      enddo
+!      do l=1,mo_num_per_kpt
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          tmp_mat2(k,p1,l+puti02) = tmp_mat2(k,p1,l+puti02) + tmp_row_kpts2(k,l)
+!          tmp_mat2(k,p2,l+puti01) = tmp_mat2(k,p2,l+puti01) + tmp_row2_kpts2(k,l)
+!        enddo
+!      enddo
+    !===================
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+        enddo
+      enddo
+!      do l=1,mo_num_per_kpt
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat(k,p1,l+puti02) = mat(k,p1,l+puti02) + tmp_row_kpts2(k,l)
+!          mat(k,p2,l+puti01) = mat(k,p2,l+puti01) + tmp_row2_kpts2(k,l)
+!        enddo
+!      enddo
+    end if
+      !===========================================================
+!        do k=1,N_states
+!          do l=1,mo_num
+!            do ii0=1,mo_num
+!              if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+!                print'((A),7(I5),2(2(E25.15),2X))','WarNInG 5a, ',k,l,ii0,hfix,pfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+!            !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+!            !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!              endif
+!            enddo
+!          enddo
+!        enddo
+      !===========================================================
+    !todo: kpts okay up to this point in get_d1_complex
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+!        kputi = (puti-1)/mo_num_per_kpt + 1
+!        khfix = (hfix-1)/mo_num_per_kpt + 1
+!        kp1   =   (p1-1)/mo_num_per_kpt + 1
+!        kp2   =   (p2-1)/mo_num_per_kpt + 1
+!        iputi = mod(puti-1,mo_num_per_kpt) + 1
+!        ihfix = mod(hfix-1,mo_num_per_kpt) + 1
+!        ip1   = mod(p1-1,  mo_num_per_kpt) + 1
+!        ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+!        call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+!        call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+        tmp_row = (0.d0,0.d0)
+        !tmp_row_kpts = (0.d0,0.d0)
+!        tmp_row_kpts2 = (0.d0,0.d0)
+        !===================
+        !begin ref
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        !end ref
+        !=================
+        !begin kpts
+!        kputj = kconserv(kp1,kp2,khfix)
+!        putj0 = (kputj-1)*mo_num_per_kpt
+!        do putj = putj0+1,hfix-1
+!          iputj = putj - putj0
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!          hij = hij_cache2(iputj,1) - hij_cache2(iputj,2)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!            !tmp_row_kpts(:,putj) = tmp_row_kpts(:,putj) + hij * coefs(:)
+!            tmp_row_kpts2(:,iputj) = tmp_row_kpts2(:,iputj) + hij * coefs(:)
+!          endif
+!        end do
+!        do putj=hfix+1,putj0+mo_num_per_kpt
+!          iputj = putj - putj0
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!          hij = hij_cache2(iputj,2) - hij_cache2(iputj,1)
+!          if (hij /= (0.d0,0.d0)) then
+!            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!            !tmp_row_kpts(:,putj) = tmp_row_kpts(:,putj) + hij * coefs(:)
+!            tmp_row_kpts2(:,iputj) = tmp_row_kpts2(:,iputj) + hij * coefs(:)
+!          endif
+!        end do
+!
+!        !end kpts
+!    !do ii0=1,mo_num
+!    !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+!    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!    !!  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+!    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!    !  endif
+!    !enddo
+!        !=================
+!        tmp_mat1 = (0.d0,0.d0)
+!        tmp_mat2 = (0.d0,0.d0)
+!        tmp_mat1(:, :puti-1, puti) = tmp_mat1(:, :puti-1, puti) + tmp_row(:,:puti-1)
+!        do l=puti,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_mat1(k, puti, l) = tmp_mat1(k, puti,l) + tmp_row(k,l)
+!          enddo
+!        enddo
+!        !=================
+!        if (kputj.lt.kputi) then
+!          tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) =  &
+!                  tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+!                  tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+!        else if (kputj.gt.kputi) then
+!          do l=1,mo_num_per_kpt
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_mat2(k, puti, l+putj0) = tmp_mat2(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+!            enddo
+!          enddo
+!        else !kputj == kputi
+!          tmp_mat2(1:N_states,putj0+1:puti-1,puti) =  &
+!                  tmp_mat2(1:N_states,putj0+1:puti-1,puti) + &
+!                  tmp_row_kpts2(1:N_states,1:iputi-1)
+!          do l=iputi,mo_num_per_kpt
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_mat2(k, puti, l+putj0) = tmp_mat2(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+!            enddo
+!          enddo
+!        endif
+!        !=================
+!        do k=1,N_states
+!          do l=1,mo_num
+!            do ii0=1,mo_num
+!              if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+!                print'((A),6(I5),2(2(E25.15),2X))','WarNInG 3a, ',k,l,ii0,hfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+!            !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+!            !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+!              endif
+!            enddo
+!          enddo
+!        enddo
+
+        !=================
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+        !!=================
+        !!todo: check for iputi=1,2
+        !if (kputj.lt.kputi) then
+        !  mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) =  &
+        !          mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+        !          tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+        !else if (kputj.gt.kputi) then
+        !  do l=1,mo_num_per_kpt
+        !    !DIR$ LOOP COUNT AVG(4)
+        !    do k=1,N_states
+        !      mat(k, puti, l+putj0) = mat(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+        !    enddo
+        !  enddo
+        !else !kputj == kputi
+        !  mat(1:N_states,putj0+1:puti-1,puti) =  &
+        !          mat(1:N_states,putj0+1:puti-1,puti) + &
+        !          tmp_row_kpts2(1:N_states,1:iputi-1)
+        !  do l=iputi,mo_num_per_kpt
+        !    !DIR$ LOOP COUNT AVG(4)
+        !    do k=1,N_states
+        !      mat(k, puti, l+putj0) = mat(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+        !    enddo
+        !  enddo
+        !endif
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+!      kpfix = (pfix-1)/mo_num_per_kpt + 1
+!      khfix = (hfix-1)/mo_num_per_kpt + 1
+!      kp1   =   (p1-1)/mo_num_per_kpt + 1
+!      kp2   =   (p2-1)/mo_num_per_kpt + 1
+!      ipfix = mod(pfix-1,mo_num_per_kpt) + 1
+!      ihfix = mod(hfix-1,mo_num_per_kpt) + 1
+!      ip1   = mod(p1-1,  mo_num_per_kpt) + 1
+!      ip2   = mod(p2-1,  mo_num_per_kpt) + 1
+      tmp_row = (0.d0,0.d0)
+      tmp_row2 = (0.d0,0.d0)
+      !tmp_row_kpts = (0.d0,0.d0)
+      !tmp_row2_kpts = (0.d0,0.d0)
+      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      !call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,pfix,ipfix,kpfix,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+      !call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,pfix,ipfix,kpfix,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+      !===============
+      !begin ref
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      !end ref
+      !===============
+      !begin kpts
+      !todo: combine if kp1==kp2
+ !     putj = p2
+ !     kputi1 = kconserv(kp1,kpfix,khfix)
+ !     puti01 = (kputi1-1)*mo_num_per_kpt
+ !     do iputi=1,mo_num_per_kpt
+ !       puti = puti01 + iputi
+ !       if(lbanned(puti,ma)) cycle
+ !       if(.not. banned(puti,putj,1)) then
+ !         hij = hij_cache2(iputi,1)
+ !         if (hij /= (0.d0,0.d0)) then
+ !           hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+ !           !DIR$ LOOP COUNT AVG(4)
+ !           do k=1,N_states
+ !             tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+ !           enddo
+ !         endif
+ !       end if
+ !     enddo
+ !     putj = p1
+ !     kputi2 = kconserv(kp2,kpfix,khfix)
+ !     puti02 = (kputi2-1)*mo_num_per_kpt
+ !     do iputi=1,mo_num_per_kpt
+ !       puti = puti02 + iputi
+ !       if(lbanned(puti,ma)) cycle
+ !       if(.not. banned(puti,putj,1)) then
+ !         hij = hij_cache2(iputi,2)
+ !         if (hij /= (0.d0,0.d0)) then
+ !           hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+ !           do k=1,N_states
+ !             tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+ !           enddo
+ !         endif
+ !       end if
+ !     end do
+ !     !end kpts
+ !     !===============
+ !   !test printing
+ !   !print'((A),5(I5))','kpt info1: ',kconserv(kpfix,kp2,khfix),khfix,kpfix,kp2,kputi2
+ !   !print'((A),5(I5))','kpt info2: ',kconserv(kpfix,kp1,khfix),khfix,kpfix,kp1,kputi1
+ !   do ii0=1,mo_num
+ !     if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+ !       print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,p1,pfix,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+ !   !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+ !   !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+ !     endif
+ !     if (cdabs(tmp_row2_kpts(1,ii0)-tmp_row2(1,ii0)).gt.1.d-12) then
+ !       print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2a, ',ii0,hfix,p2,pfix,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+ !   !  else if ((cdabs(tmp_row2_kpts(1,ii0))+cdabs(tmp_row2(1,ii0))).gt.1.d-12) then
+ !   !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2b, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+ !     endif
+ !   enddo
+    !===================
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+        enddo
+      enddo
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+        call i_h_j_complex(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * dconjg(hij)
+        enddo
+      end do
+    end do
+end
+
+
diff --git a/src/cipsi/d1_old.irp.f b/src/cipsi/d1_old.irp.f
new file mode 100644
index 00000000..0c93b157
--- /dev/null
+++ b/src/cipsi/d1_old.irp.f
@@ -0,0 +1,263 @@
+
+subroutine get_d1_complex_old(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  complex*16, intent(in)   :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  complex*16, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  complex*16, allocatable :: hij_cache(:,:)
+  complex*16               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      tmp_row = (0.d0,0.d0)
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row = (0.d0,0.d0)
+    tmp_row2 = (0.d0,0.d0)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    putj = p1
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+!    enddo
+!      
+      putj = p2
+!    do puti=1,mo_num !HOT
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+        tmp_row = (0.d0,0.d0)
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row = (0.d0,0.d0)
+      tmp_row2 = (0.d0,0.d0)
+      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+        enddo
+      enddo
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+        call i_h_j_complex(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * dconjg(hij)
+        enddo
+      end do
+    end do
+end
+
diff --git a/src/cipsi/energy.irp.f b/src/cipsi/energy.irp.f
index 1d8c6bf5..50dc4620 100644
--- a/src/cipsi/energy.irp.f
+++ b/src/cipsi/energy.irp.f
@@ -17,7 +17,11 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
      pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
    else if (h0_type == "HF") then
      do i=1,N_states
-       j = maxloc(abs(psi_coef(:,i)),1)
+       if (is_complex) then
+         j = maxloc(cdabs(psi_coef_complex(:,i)),1)
+       else
+         j = maxloc(abs(psi_coef(:,i)),1)
+       endif
        pt2_E0_denominator(i) = psi_det_hii(j)
      enddo
    else if (h0_type == "Barycentric") then
@@ -45,3 +49,11 @@ BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ]
  pt2_overlap(1:N_states,1:N_states) = 0.d0
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, pt2_overlap_imag, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap_imag(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f
index 31f27e1d..c555fe0b 100644
--- a/src/cipsi/pt2_stoch_routines.irp.f
+++ b/src/cipsi/pt2_stoch_routines.irp.f
@@ -63,11 +63,19 @@ logical function testTeethBuilding(minF, N)
 
   double precision :: norm2
   norm2 = 0.d0
-  do i=N_det_generators,1,-1
-    tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate) * &
-                  psi_coef_sorted_gen(i,pt2_stoch_istate)
-    norm2 = norm2 + tilde_w(i)
-  enddo
+  if (is_complex) then
+    do i=N_det_generators,1,-1
+      tilde_w(i)  = cdabs(psi_coef_sorted_gen_complex(i,pt2_stoch_istate) * &
+                    psi_coef_sorted_gen_complex(i,pt2_stoch_istate))
+      norm2 = norm2 + tilde_w(i)
+    enddo
+  else
+    do i=N_det_generators,1,-1
+      tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate) * &
+                    psi_coef_sorted_gen(i,pt2_stoch_istate)
+      norm2 = norm2 + tilde_w(i)
+    enddo
+  endif
 
   f = 1.d0/norm2
   tilde_w(:) = tilde_w(:) * f
@@ -125,11 +133,19 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
   integer(ZMQ_PTR), external     :: new_zmq_to_qp_run_socket
   type(selection_buffer)         :: b
 
-  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
-  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
-  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
-  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
-  PROVIDE psi_det_hii selection_weight pseudo_sym
+  if (is_complex) then
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_complex psi_det_sorted
+    PROVIDE psi_det_hii selection_weight pseudo_sym
+  else
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
+    PROVIDE psi_det_hii selection_weight pseudo_sym
+  endif
 
   if (h0_type == 'SOP') then
     PROVIDE psi_occ_pattern_hii det_to_occ_pattern
@@ -154,8 +170,16 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
       state_average_weight(pt2_stoch_istate) = 1.d0
       TOUCH state_average_weight pt2_stoch_istate selection_weight
 
-      PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w
-      PROVIDE psi_selectors pt2_u pt2_J pt2_R
+      if (is_complex) then
+        !todo: psi_selectors isn't linked to psi_selectors_coef anymore; should we provide both?
+        !PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals_complex pt2_w
+        PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals_kpts pt2_w
+        PROVIDE psi_selectors pt2_u pt2_J pt2_R
+      else
+        PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w
+        PROVIDE psi_selectors pt2_u pt2_J pt2_R
+      endif
+
       call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
 
       integer, external              :: zmq_put_psi
@@ -267,6 +291,10 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
               + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
               + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
               ) / 1024.d0**3
+        if (is_complex) then
+          ! mat is complex
+          mem = mem + (nproc_target*8.d0*(N_states*mo_num* mo_num)) / 1024.d0**3
+        endif
 
         if (nproc_target == 0) then
           call check_mem(mem,irp_here)
@@ -318,6 +346,13 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
       pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
     enddo
     SOFT_TOUCH pt2_overlap
+    if (is_complex) then
+      !TODO: transpose/conjugate?
+      do k=1,N_states
+        pt2_overlap_imag(pt2_stoch_istate,k) = pt2_data % overlap_imag(k,pt2_stoch_istate)
+      enddo
+      SOFT_TOUCH pt2_overlap_imag
+    endif
 
     enddo
     FREE pt2_stoch_istate
@@ -329,12 +364,23 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
        pt2_overlap(j,i) = pt2_overlap(i,j)
      enddo
     enddo
+    
+    if (is_complex) then
+      !TODO: check sign
+      do j=2,N_states
+        do i=1,j-1
+          pt2_overlap_imag(i,j) = 0.5d0 * (pt2_overlap_imag(i,j) - pt2_overlap_imag(j,i))
+          pt2_overlap_imag(j,i) = -pt2_overlap_imag(i,j)
+        enddo
+      enddo
+    endif
 
     print *, 'Overlap of perturbed states:'
     do k=1,N_states
       print *, pt2_overlap(k,:)
     enddo
     print *, '-------'
+    !TODO: print imag part?
 
     if (N_in > 0) then
       b%cur = min(N_in,b%cur)
@@ -397,6 +443,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
   integer, allocatable :: index(:)
 
   double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states)
+  double precision :: avg3im(N_states), n0im(N_states)
   double precision :: eqta(N_states)
   double precision :: time, time1, time0
 
@@ -439,6 +486,11 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
   pt2_data_err % variance(pt2_stoch_istate) = huge(1.)
   pt2_data % overlap(:,pt2_stoch_istate) = 0.d0
   pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.)
+  !TODO: init overlap_imag?
+  if (is_complex) then
+    pt2_data % overlap_imag(:,pt2_stoch_istate) = 0.d0
+    pt2_data_err % overlap_imag(:,pt2_stoch_istate) = 0.d0
+  endif
   n = 1
   t = 0
   U = 0
@@ -458,6 +510,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
   E0 = E
   v0 = 0.d0
   n0(:) = 0.d0
+  n0im(:) = 0.d0
   more = 1
   call wall_time(time0)
   time1 = time0
@@ -478,10 +531,14 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
           E0 = 0.d0
           v0 = 0.d0
           n0(:) = 0.d0
+          n0im(:) = 0.d0
           do i=pt2_n_0(t),1,-1
             E0 += pt2_data_I(i) % pt2(pt2_stoch_istate)
             v0 += pt2_data_I(i) % variance(pt2_stoch_istate)
             n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate)
+            if (is_complex) then
+              n0im(:) += pt2_data_I(i) % overlap_imag(:,pt2_stoch_istate)
+            endif
           end do
         else
           exit
@@ -506,6 +563,9 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
         avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
         avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
         avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
+        if (is_complex) then
+          avg3im(:) = n0im(:) + pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate) / dble(c)
+        endif
         if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
           do_exit = .true.
         endif
@@ -515,6 +575,9 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
         pt2_data % pt2(pt2_stoch_istate) = avg
         pt2_data % variance(pt2_stoch_istate) = avg2
         pt2_data % overlap(:,pt2_stoch_istate) = avg3(:)
+        if (is_complex) then
+          pt2_data % overlap_imag(:,pt2_stoch_istate) = avg3im(:)
+        endif
         call wall_time(time)
         ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
         if(c > 2) then
@@ -526,7 +589,13 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
           eqt = sqrt(eqt / (dble(c) - 1.5d0))
           pt2_data_err % variance(pt2_stoch_istate) = eqt
 
-          eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          if (is_complex) then
+            eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - &
+            (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2 - &
+            (pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate)/c)**2 ) ! dabs for numerical stability
+          else
+            eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          endif
           eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
           pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)
 
@@ -794,11 +863,17 @@ END_PROVIDER
      allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
 
      tilde_cW(0) = 0d0
-
-     do i=1,N_det_generators
-       tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
-     enddo
-
+    
+     if (is_complex) then
+       do i=1,N_det_generators
+         tilde_w(i)  = cdabs(psi_coef_sorted_gen_complex(i,pt2_stoch_istate))**2 !+ 1.d-20
+       enddo
+     else
+       do i=1,N_det_generators
+         tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+       enddo
+     endif
+     
      double precision               :: norm2
      norm2 = 0.d0
      do i=N_det_generators,1,-1
@@ -816,7 +891,7 @@ END_PROVIDER
      pt2_n_0(1) = 0
      do
      pt2_u_0 = tilde_cW(pt2_n_0(1))
-     r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
+     r = tilde_cW(pt2_n_0(1) + pt2_mindetinfirstteeth)
      pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
      if(pt2_W_T >= r - pt2_u_0) then
        exit
@@ -842,7 +917,7 @@ END_PROVIDER
      endif
      ASSERT(tooth_width > 0.d0)
      do i=pt2_n_0(t)+1, pt2_n_0(t+1)
-       pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
+       pt2_w(i) = tilde_w(i) * pt2_w_t / tooth_width
      end do
    end do
 
@@ -856,6 +931,3 @@ END_PROVIDER
 END_PROVIDER
 
 
-
-
-
diff --git a/src/cipsi/pt2_type.irp.f b/src/cipsi/pt2_type.irp.f
index ee90d421..7964a942 100644
--- a/src/cipsi/pt2_type.irp.f
+++ b/src/cipsi/pt2_type.irp.f
@@ -15,6 +15,10 @@ subroutine pt2_alloc(pt2_data,N)
   pt2_data % variance(:)      = 0.d0
   pt2_data % rpt2(:)          = 0.d0
   pt2_data % overlap(:,:)     = 0.d0
+  if (is_complex) then
+    allocate(pt2_data % overlap_imag(N,N))
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
 
 end subroutine
 
@@ -27,6 +31,9 @@ subroutine pt2_dealloc(pt2_data)
             ,pt2_data % rpt2        &
             ,pt2_data % overlap     &
             )
+  if (is_complex) then
+    deallocate(pt2_data % overlap_imag)
+  endif
 end subroutine
 
 subroutine pt2_add(p1, w, p2)
@@ -45,6 +52,9 @@ subroutine pt2_add(p1, w, p2)
     p1 % rpt2(:)           = p1 % rpt2(:)          + p2 % rpt2(:)
     p1 % variance(:)       = p1 % variance(:)      + p2 % variance(:)
     p1 % overlap(:,:)      = p1 % overlap(:,:)     + p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + p2 % overlap_imag(:,:)
+    endif
 
   else
 
@@ -52,6 +62,9 @@ subroutine pt2_add(p1, w, p2)
     p1 % rpt2(:)           = p1 % rpt2(:)          + w * p2 % rpt2(:)
     p1 % variance(:)       = p1 % variance(:)      + w * p2 % variance(:)
     p1 % overlap(:,:)      = p1 % overlap(:,:)     + w * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + w * p2 % overlap_imag(:,:)
+    endif
 
   endif
 
@@ -74,6 +87,9 @@ subroutine pt2_add2(p1, w, p2)
     p1 % rpt2(:)          = p1 % rpt2(:)          + p2 % rpt2(:)          * p2 % rpt2(:)
     p1 % variance(:)      = p1 % variance(:)      + p2 % variance(:)      * p2 % variance(:)
     p1 % overlap(:,:)     = p1 % overlap(:,:)     + p2 % overlap(:,:)     * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap(:,:)   = p1 % overlap(:,:)     + p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:)
+    endif
 
   else
 
@@ -81,6 +97,9 @@ subroutine pt2_add2(p1, w, p2)
     p1 % rpt2(:)          = p1 % rpt2(:)          + w * p2 % rpt2(:)          * p2 % rpt2(:)
     p1 % variance(:)      = p1 % variance(:)      + w * p2 % variance(:)      * p2 % variance(:)
     p1 % overlap(:,:)     = p1 % overlap(:,:)     + w * p2 % overlap(:,:)     * p2 % overlap(:,:)
+    if (is_complex) then
+      p1 % overlap(:,:) = p1 % overlap(:,:)  + w * p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:)
+    endif
 
   endif
 
@@ -104,6 +123,10 @@ subroutine pt2_serialize(pt2_data, n, x)
   x(k+1:k+n)     =  pt2_data % variance(1:n)
   k=k+n
   x(k+1:k+n2)  =  reshape(pt2_data % overlap(1:n,1:n), (/ n2 /))
+  if (is_complex) then
+    k=k+n2
+    x(k+1:k+n2) = reshape(pt2_data % overlap_imag(1:n,1:n), (/ n2 /))
+  endif
 
 end
 
@@ -124,5 +147,9 @@ subroutine pt2_deserialize(pt2_data, n, x)
   pt2_data % variance(1:n)      =   x(k+1:k+n)
   k=k+n
   pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+  if (is_complex) then
+    k=k+n2
+    pt2_data % overlap_imag(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+  endif
 
 end
diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f
index c2ba2379..0d06d1d0 100644
--- a/src/cipsi/run_selection_slave.irp.f
+++ b/src/cipsi/run_selection_slave.irp.f
@@ -20,11 +20,17 @@ subroutine run_selection_slave(thread,iproc,energy)
   logical :: done, buffer_ready
   type(pt2_type) :: pt2_data
 
+  
+  !todo: check for providers that are now unlinked for real/complex
   PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
   PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
   PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym
-  PROVIDE psi_selectors_coef_transp psi_det_sorted weight_selection
+  if (is_complex) then
+    PROVIDE psi_selectors_coef_transp_complex psi_det_sorted weight_selection
+  else
+    PROVIDE psi_selectors_coef_transp psi_det_sorted weight_selection
+  endif
 
   call pt2_alloc(pt2_data,N_states)
 
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index e599737c..f34be9cf 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -179,6 +179,7 @@ end subroutine
 
 
 subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
+  !todo: simplify for kpts
   use bitmasks
   use selection_types
   implicit none
@@ -196,6 +197,9 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset)
 
   call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
 
+  ! possible holes and particles for this generator
+  ! hole_mask: occupied in this generator   .AND. occupied in generators_bitmask_hole
+  ! part_mask: unoccupied in this generator .AND. occupied in generators_bitmask_part
   do k=1,N_int
       hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole), psi_det_generators(k,1,i_generator))
       hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole), psi_det_generators(k,2,i_generator))
@@ -267,6 +271,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   type(selection_buffer), intent(inout) :: buf
 
   integer                         :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,sze
+  integer   :: kh1,kh2,kpt12,kk1,kk2,ik01,ik02,ik1,ik2
   integer(bit_kind)               :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2)
   logical                         :: fullMatch, ok
 
@@ -277,9 +282,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :)
   logical, allocatable           :: banned(:,:,:), bannedOrb(:,:)
   double precision, allocatable  :: coef_fullminilist_rev(:,:)
+  complex*16, allocatable  :: coef_fullminilist_rev_complex(:,:)
 
 
   double precision, allocatable   :: mat(:,:,:)
+  complex*16, allocatable   :: mat_complex(:,:,:)
 
   logical :: monoAdo, monoBdo
   integer :: maskInd
@@ -287,12 +294,17 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
   PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
-  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp
+  PROVIDE psi_bilinear_matrix_transp_order
+  if (is_complex) then
+    PROVIDE psi_selectors_coef_transp_complex
+  else
+    PROVIDE psi_selectors_coef_transp
+  endif
 
   monoAdo = .true.
   monoBdo = .true.
 
-
+  !todo: this is already done in select_connected? why repeat?
   do k=1,N_int
     hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
     hole    (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2))
@@ -312,20 +324,40 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   integer, allocatable           :: indices(:), exc_degree(:), iorder(:)
   allocate (indices(N_det),                                          &
       exc_degree(max(N_det_alpha_unique,N_det_beta_unique)))
+  
+  
+  ! S_s = selectors
+  ! S_0 = {|D_G>}                                       (i_generator determinant)
+  ! S_j = {|D_k> : |D_k> \in T_j|D_G> }                 (i.e. S_2 is all dets connected to |D_G> by a double excitation)
+  ! S_2b = S_2 \intersection {|D_k> : a_{h1}|D_k> != 0} (in S_2 and h1 is occupied)
+  ! S_2' = S_2 \ {|D_k> : a_{h1}|D_k> != 0}             (in S_2 and h1 is not occupied)
+  ! S_4b = S_4 \intersection {|D_k> : a_{h1}|D_k> != 0} (in S_4 and h1 is occupied)
+  ! S_4' = S_4 \ {|D_k> : a_{h1}|D_k> != 0}             (in S_4 and h1 is not occupied)
 
+  ! construct the following sets of determinants:
+  !   preinteresting: S_pi = (U_{j=0..4} S_j) \intersection S_s
+  !   prefullinteresting: S_pfi = (U_{j=0..2} S_j) \ S_s
+  !   interesting: S_i = S_pi \ S_4b = ( (U_{j=0..3} S_j) U S_4' ) \intersection S_s
+  !   fullinteresting: S_fi = S_i U (S_pfi \ S_2b) = (S_0 U S_1 U S_2') 
+  !     (in order, first elements are in S_s, later elements are not in S_s) 
+
+
+  ! get indices of all unique dets for which total excitation degree (relative to i_generator) is <= 4
   k=1
+  ! get exc_degree(i) for each unique alpha det(i) from i_generator(alpha)
   do i=1,N_det_alpha_unique
     call get_excitation_degree_spin(psi_det_alpha_unique(1,i),       &
         psi_det_generators(1,1,i_generator), exc_degree(i), N_int)
   enddo
-
+  
+  ! get exc_degree (= nt) for each unique beta det(j) from i_generator(beta)
   do j=1,N_det_beta_unique
     call get_excitation_degree_spin(psi_det_beta_unique(1,j),        &
         psi_det_generators(1,2,i_generator), nt, N_int)
-    if (nt > 2) cycle
+    if (nt > 2) cycle ! don't keep anything more than double beta exc
     do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1
       i = psi_bilinear_matrix_rows(l_a)
-      if (nt + exc_degree(i) <= 4) then
+      if (nt + exc_degree(i) <= 4) then ! don't keep anything more than 4-fold total exc
         idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
         if (psi_average_norm_contrib_sorted(idx) > 1.d-20) then
           indices(k) = idx
@@ -334,6 +366,23 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       endif
     enddo
   enddo
+  
+
+  !  indices now contains det indices (in psi_det_sorted) of dets which differ from generator by:
+  !  (exc_alpha,exc_beta) in
+  !  (4,0)
+  !  (3,0), (3,1)
+  !  (2,0), (2,1), (2,2)
+  !  (1,0), (1,1), (1,2)
+  !  (0,0), (0,1), (0,2)
+  !
+  !                              (4,0)
+  !                       (3,0), (3,1)
+  !                (2,0), (2,1), (2,2)
+  !         (1,0), (1,1), (1,2)
+  !  (0,0), (0,1), (0,2)
+  !
+  !  below, add (0,3), (0,4), (1,3)
 
   do i=1,N_det_beta_unique
     call get_excitation_degree_spin(psi_det_beta_unique(1,i),        &
@@ -368,7 +417,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
   enddo
   call isort(indices,iorder,nmax)
   deallocate(iorder)
-
+  ! sort indices by location in psi_det_sorted
+  
   ! Start with 32 elements. Size will double along with the filtering.
   allocate(preinteresting(0:32), prefullinteresting(0:32),     &
       interesting(0:32), fullinteresting(0:32))
@@ -382,6 +432,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
 
   do k=1,nmax
     i = indices(k)
+    ! mobMask in psi_det(i) but not in i_generator
+    ! nt = popcnt(mobMask)
     mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
     mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
     nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
@@ -390,6 +442,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i))
       nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
     end do
+    
+    ! preinteresting:     within a 4-fold excitation from i_generator; in selectors
+    ! prefullinteresting: within a double excitation from i_generator; not in selectors
 
     if(nt <= 4) then
       if(i <= N_det_selectors) then
@@ -426,7 +481,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
 !  !$OMP END CRITICAL
 
   allocate(banned(mo_num, mo_num,2), bannedOrb(mo_num, 2))
+  if (is_complex) then
+  allocate (mat_complex(N_states, mo_num, mo_num))
+  else
   allocate (mat(N_states, mo_num, mo_num))
+  endif
   maskInd = -1
 
   integer                        :: nb_count, maskInd_save
@@ -455,10 +514,16 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       maskInd = maskInd_save
 
       h1 = hole_list(i1,s1)
+!todo: kpts
+      if (is_complex) then
+        kh1 = (h1-1)/mo_num_per_kpt + 1
+      endif
+      ! pmask is i_generator det with bit at h1 set to zero
       call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int)
 
       negMask = not(pmask)
 
+      ! see set definitions above 
       interesting(0) = 0
       fullinteresting(0) = 0
 
@@ -517,7 +582,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
               endif
             end do
         end select
-
+        
+        ! nt = ( orbs occupied in preinteresting(ii) and not occupied in i_gen(after removing elec from h1) )
         if(nt <= 4) then
           sze = interesting(0)
           if (sze+1 == size(interesting)) then
@@ -579,12 +645,17 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
       allocate (fullminilist (N_int, 2, fullinteresting(0)), &
                     minilist (N_int, 2,     interesting(0)) )
       if(pert_2rdm)then
-        allocate(coef_fullminilist_rev(N_states,fullinteresting(0)))
-        do i=1,fullinteresting(0)
-          do j = 1, N_states
-            coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j)
+        if (is_complex) then
+          print*,irp_here,' not implemented for complex: pert_2rdm'
+          stop -1
+        else
+          allocate(coef_fullminilist_rev(N_states,fullinteresting(0))) 
+          do i=1,fullinteresting(0)
+            do j = 1, N_states
+              coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j)
+            enddo
           enddo
-        enddo
+        endif
       endif
 
       do i=1,fullinteresting(0)
@@ -606,23 +677,54 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
         do i2=N_holes(s2),ib,-1   ! Generate low excitations first
 
           h2 = hole_list(i2,s2)
-          call apply_hole(pmask, s2,h2, mask, ok, N_int)
-          banned = .false.
+          if (is_complex) then
+!=============================================================
+!!todo use this once kpts are implemented
+            kh2 = (h2-1)/mo_num_per_kpt + 1
+            kpt12 = kconserv(kh1,kh2,1)
+            ! mask is gen_i with (h1,s1),(h2,s2) removed
+            call apply_hole(pmask, s2,h2, mask, ok, N_int)
+            banned = .true.
+            ! only allow excitations that conserve momentum
+            do kk1=1,kpt_num
+              ! equivalent to kk2 = kconserv(kh1,kh2,kk1)
+              kk2 = kconserv(kpt12,1,kk1)
+              ik01 = (kk1-1) * mo_num_per_kpt + 1 !first mo in kk1
+              ik02 = (kk2-1) * mo_num_per_kpt + 1 !first mo in kk2
+              do ik1 = ik01, ik01 + mo_num_per_kpt - 1 !loop over mos in kk1
+                do ik2 = ik02, ik02 + mo_num_per_kpt - 1 !loop over mos in kk2
+                  ! depending on sp, might not need both of these?
+                  ! sp=1 (a,a) or sp=2 (b,b): only use banned(:,:,1)
+                  ! sp=3 (a,b): banned(alpha,beta,1) is transpose of banned(beta,alpha,2)
+                  banned(ik1,ik2,1) = .false.
+                  banned(ik1,ik2,2) = .false.
+                enddo
+              enddo
+            enddo
+!=============================================================
+!            ! mask is gen_i with (h1,s1),(h2,s2) removed
+!            call apply_hole(pmask, s2,h2, mask, ok, N_int)
+!            banned = .false.
+!=============================================================
+          else
+            call apply_hole(pmask, s2,h2, mask, ok, N_int)
+            banned = .false.
+          endif
           do j=1,mo_num
             bannedOrb(j, 1) = .true.
             bannedOrb(j, 2) = .true.
           enddo
           do s3=1,2
             do i=1,N_particles(s3)
-              bannedOrb(particle_list(i,s3), s3) = .false.
+              bannedOrb(particle_list(i,s3), s3) = .false. ! allow excitation into orbitals in particle_list
             enddo
           enddo
           if(s1 /= s2) then
             if(monoBdo) then
-              bannedOrb(h1,s1) = .false.
+              bannedOrb(h1,s1) = .false. ! allow alpha elec to go back into alpha hole
             end if
             if(monoAdo) then
-              bannedOrb(h2,s2) = .false.
+              bannedOrb(h2,s2) = .false. ! allow beta elec to go back into beta hole
               monoAdo = .false.
             end if
           end if
@@ -635,7 +737,18 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
 ! !$OMP CRITICAL
 !  print *,  'Step3: ', i_generator, h1, interesting(0)
 ! !$OMP END CRITICAL
-
+            if (is_complex) then
+              call splash_pq_complex(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat_complex, interesting)
+              
+              if(.not.pert_2rdm)then
+               !call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf)
+               call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat_complex, buf)
+              else
+                print*,irp_here,' not implemented for complex (fill_buffer_double_rdm_complex)'
+                stop -1
+               !call fill_buffer_double_rdm_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf,fullminilist, coef_fullminilist_rev_complex, fullinteresting(0))
+              endif
+            else
             call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)
 
             if(.not.pert_2rdm)then
@@ -643,18 +756,29 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
             else
              call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
             endif
+            endif!complex
           end if
-        enddo
+        enddo !i2
         if(s1 /= s2) monoBdo = .false.
-      enddo
+      enddo !s2
       deallocate(fullminilist,minilist)
       if(pert_2rdm)then
-        deallocate(coef_fullminilist_rev)
+        if (is_complex) then
+          print*,irp_here,' not implemented for complex: pert_2rdm'
+          stop -1
+        else
+          deallocate(coef_fullminilist_rev)
+        endif
       endif
-    enddo
-  enddo
+    enddo ! i1
+  enddo ! s1
   deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
-  deallocate(banned, bannedOrb,mat)
+  deallocate(banned, bannedOrb)
+  if (is_complex) then
+    deallocate(mat_complex)
+  else
+    deallocate(mat)
+  endif
 end subroutine
 
 
@@ -761,7 +885,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         if (.not.is_a_1h1p(det)) cycle
       endif
 
-      Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
+      Hii = diag_h_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
 
       w = 0d0
 
@@ -1547,7 +1671,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
 
   genl : do i=1, N
     ! If det(i) can't be generated by the mask, cycle
-    do j=1, N_int
+    do j=1, N_int  ! if all occupied orbs in mask are not also occupied in det(i), go to next det
       if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl
       if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl
     end do
@@ -1559,11 +1683,14 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
     end if
 
     ! Identify the particles
-    do j=1, N_int
+    do j=1, N_int ! if electrons are excited into the orbs given by myMask, resulting determinant will be det(i)
       myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1))
       myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2))
     end do
 
+    ! don't allow excitations into this pair of orbitals?
+    ! should 'banned' have dimensions (mo_num,mo_num,2)?
+    ! is it always true that popcnt(myMask) = 2 ? (sum over N_int and alpha/beta spins)
     call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int)
     call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int)
     banned(list(1), list(2)) = .true.
@@ -1952,3 +2079,1554 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
 end
 
 
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
+  !todo: should be okay for complex
+  use bitmasks
+  use selection_types
+  implicit none
+
+  integer, intent(in) :: i_generator, sp, h1, h2
+  complex*16, intent(in) :: mat(N_states, mo_num, mo_num)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
+  double precision, intent(in)    :: fock_diag_tmp(mo_num)
+  double precision, intent(in)    :: E0(N_states)
+  type(pt2_type), intent(inout)   :: pt2_date
+  type(selection_buffer), intent(inout) :: buf
+  logical :: ok
+  integer :: s1, s2, p1, p2, ib, j, istate, jstate
+  integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
+  double precision :: e_pert(n_states), x(n_states)
+  double precision :: delta_E, val, Hii, w, tmp
+  complex*16 ::  alpha_h_psi, coef(n_states), val_c
+  double precision, external :: diag_H_mat_elem_fock
+  double precision :: E_shift
+
+!  logical, external :: detEq
+!  double precision, allocatable :: values(:)
+!  integer, allocatable          :: keys(:,:)
+!  integer                       :: nkeys
+  double precision :: s_weight(n_states,n_states)
+  do jstate=1,n_states
+    do istate=1,n_states
+      s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate))
+    enddo
+  enddo
+
+  if(sp == 3) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = sp
+    s2 = sp
+  end if
+  call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int)
+  E_shift = 0.d0
+
+  if (h0_type == 'SOP') then
+    j = det_to_occ_pattern(i_generator)
+    E_shift = psi_det_Hii(i_generator) - psi_occ_pattern_Hii(j)
+  endif
+
+  do p1=1,mo_num
+    if(bannedOrb(p1, s1)) cycle
+    ib = 1
+    if(sp /= 3) ib = p1+1
+
+    do p2=ib,mo_num
+
+! -----
+! /!\ Generating only single excited determinants doesn't work because a
+! determinant generated by a single excitation may be doubly excited wrt
+! to a determinant of the future. In that case, the determinant will be
+! detected as already generated when generating in the future with a
+! double excitation.
+!     
+!      if (.not.do_singles) then
+!        if ((h1 == p1) .or. (h2 == p2)) then
+!          cycle
+!        endif
+!      endif
+!
+!      if (.not.do_doubles) then
+!        if ((h1 /= p1).and.(h2 /= p2)) then
+!          cycle
+!        endif
+!      endif
+! -----
+
+      if(bannedOrb(p2, s2)) cycle
+      if(banned(p1,p2)) cycle
+
+      val = maxval(cdabs(mat(1:N_states, p1, p2)))
+      if( val == 0d0) cycle
+      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+
+      if (do_only_cas) then
+        integer, external :: number_of_holes, number_of_particles
+        if (number_of_particles(det)>0) then
+          cycle
+        endif
+        if (number_of_holes(det)>0) then
+          cycle
+        endif
+      endif
+
+      if (do_ddci) then
+        logical, external  :: is_a_two_holes_two_particles
+        if (is_a_two_holes_two_particles(det)) then
+          cycle
+        endif
+      endif
+
+      if (do_only_1h1p) then
+        logical, external :: is_a_1h1p
+        if (.not.is_a_1h1p(det)) cycle
+      endif
+
+      Hii = diag_h_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
+
+      w = 0d0
+
+!      integer(bit_kind) :: occ(N_int,2), n
+!      call occ_pattern_of_det(det,occ,N_int)
+!      call occ_pattern_to_dets_size(occ,n,elec_alpha_num,N_int)
+
+
+      do istate=1,N_states
+        delta_E = E0(istate) - Hii + E_shift
+        alpha_h_psi = mat(istate, p1, p2)
+        val_c = alpha_h_psi + alpha_h_psi
+        tmp = dsqrt(delta_E * delta_E + cdabs(val_c * val_c))
+        if (delta_E < 0.d0) then
+            tmp = -tmp
+        endif
+        e_pert(istate) = 0.5d0 * (tmp - delta_E)
+        !TODO: check conjugate for coef
+        if (cdabs(alpha_h_psi) > 1.d-4) then
+          coef(istate) = e_pert / alpha_h_psi
+        else
+          coef(istate) = alpha_h_psi / delta_E
+        endif
+        if (e_pert(istate) < 0.d0) then
+          x(istate) = -dsqrt(-e_pert(istate))
+        else
+          x(istate) = dsqrt(e_pert(istate))
+        endif
+      enddo
+
+      do istate=1,n_states
+        do jstate=1,n_states
+          val_c = coef(jstate) * dconjg(coef(istate)) 
+          pt2_data % overlap(jstate,istate) += dble(val_c) 
+          pt2_data % overlap_imag(jstate,istate) += dimag(val_c) 
+        enddo
+      enddo
+
+      do istate=1,n_states
+        alpha_h_psi = mat(istate, p1, p2)
+        pt2_data % variance(istate) += cdabs(alpha_h_psi * alpha_h_psi)
+        pt2_data % pt2(istate) += e_pert(istate)
+
+!!!DEBUG
+!        integer :: k
+!        double precision :: alpha_h_psi_2,hij
+!        alpha_h_psi_2 = 0.d0
+!        do k = 1,N_det_selectors
+!         call i_H_j(det,psi_selectors(1,1,k),N_int,hij)
+!         alpha_h_psi_2 = alpha_h_psi_2 + psi_selectors_coef(k,istate) * hij
+!        enddo
+!        if(dabs(alpha_h_psi_2 - alpha_h_psi).gt.1.d-12)then
+!         call debug_det(psi_det_generators(1,1,i_generator),N_int)
+!         call debug_det(det,N_int)                                                                                        
+!         print*,'alpha_h_psi,alpha_h_psi_2 = ',alpha_h_psi,alpha_h_psi_2
+!         stop
+!        endif
+!!!DEBUG
+
+        select case (weight_selection)
+
+          !TODO: check off-diagonals
+          case(5)
+            ! Variance selection
+            w = w - cdabs(alpha_h_psi * alpha_h_psi) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              w = w + cdabs(alpha_h_psi * mat(jstate,p1,p2)) * s_weight(istate,jstate)
+            enddo
+
+          case(6)
+            w = w - cdabs(coef(istate) * coef(istate)) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              w = w + cdabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate)
+            enddo
+
+          case default
+            ! Energy selection
+            w = w + e_pert(istate) * s_weight(istate,istate)
+            do jstate=1,n_states
+              if (istate == jstate) cycle
+              !TODO: why dabs?
+              w = w - dabs(x(istate))*x(jstate) * s_weight(istate,jstate)
+            enddo
+
+        end select
+      end do
+
+
+      if(pseudo_sym)then
+        if(cdabs(mat(1, p1, p2)).lt.thresh_sym)then 
+          w = 0.d0
+        endif
+      endif
+
+!      w = dble(n) * w
+
+      if(w <= buf%mini) then
+        call add_to_selection_buffer(buf, det, w)
+      end if
+    end do
+  end do
+end
+
+subroutine splash_pq_complex(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+! Computes the contributions A(r,s) by
+! comparing the external determinant to all the internal determinants det(i).
+! an applying two particles (r,s) to the mask.
+  END_DOC
+
+  integer, intent(in)            :: sp, i_gen, N_sel
+  integer, intent(in)            :: interesting(0:N_sel)
+  integer(bit_kind),intent(in)   :: mask(N_int, 2), det(N_int, 2, N_sel)
+  logical, intent(inout)         :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2)
+  ! mat should be out, not inout? (if only called from select_singles_and_doubles)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+
+  integer                        :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt
+  integer(bit_kind)              :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2)
+  integer(bit_kind)             :: phasemask(N_int,2)
+
+  PROVIDE psi_selectors_coef_transp_complex psi_det_sorted
+  mat = 0d0
+
+  do i=1,N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  do i=1, N_sel 
+    if (interesting(i) < 0) then
+      stop 'prefetch interesting(i) and det(i)'
+    endif
+
+    mobMask(1,1) = iand(negMask(1,1), det(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), det(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+
+    if(nt > 4) cycle
+
+    do j=2,N_int
+      mobMask(j,1) = iand(negMask(j,1), det(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), det(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    end do
+
+    if(nt > 4) cycle
+
+    if (interesting(i) == i_gen) then
+        if(sp == 3) then
+          do k=1,mo_num
+            do j=1,mo_num
+              banned(j,k,2) = banned(k,j,1)
+            enddo
+          enddo
+        else
+          do k=1,mo_num
+          do l=k+1,mo_num
+            banned(l,k,1) = banned(k,l,1)
+          end do
+          end do
+        end if
+    end if
+
+    ! p contains orbs in det that are not in the doubly ionized generator
+    if (interesting(i) >= i_gen) then ! det past i_gen
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+
+        perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
+        perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
+        do j=2,N_int
+          perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
+          perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
+        end do
+
+      ! h contains orbs in the doubly ionized generator that are not in det
+        call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
+        call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
+
+        call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int)
+        if(nt == 4) then ! differ by 6 (2,4)
+          call get_d2_complex(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp_complex(1, interesting(i)))
+        else if(nt == 3) then ! differ by 4 (1,3)
+          !call get_d1_complex(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp_complex(1, interesting(i)))
+          !call get_d1_kpts(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp_complex(1, interesting(i)))
+          call get_d1_kpts_new(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp_complex(1, interesting(i)))
+        else ! differ by 2 (0,2) 
+          call get_d0_complex(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp_complex(1, interesting(i)))
+        end if
+    else if(nt == 4) then ! differ by 6 (2,4); i_gen past det
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d2(banned, p, sp)
+    else if(nt == 3) then ! differ by 4 (1,3); i_gen past det
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d1(bannedOrb, p)
+    end if
+  end do
+
+end
+
+
+subroutine get_d2_complex(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be correct for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  complex*16, intent(in) :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi
+  complex*16, external :: mo_two_e_integral_complex
+
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: phase
+  complex*16 :: hij
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles
+
+  ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b)
+  if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles
+  if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then ! if one alpha and one beta xhole 
+    !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator)
+    if(ma == 2) bant = 2 ! if more beta particles than alpha particles
+
+    if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+
+      do i = 1, 3    ! loop over all 3 combinations of 2 particles with spin ma
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        
+     ! |G> = |psi_{gen,i}>
+     ! |G'> = a_{x1} a_{x2} |G>
+     ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'>
+     ! |alpha> = t_{x1,x2}^{puti,putj} |G>
+     ! hij = <psi_{selectors,i}|H|alpha>
+     ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}>
+        !todo: <i|H|j>  =  (<h1,h2|p1,p2> - <h1,h2|p2,p1>) * phase
+        !    <psi|H|j> +=  dconjg(c_i) * <i|H|j>
+        !      <j|H|i>  =  (<p1,p2|h1,h2> - <p2,p1|h1,h2>) * phase
+        !    <j|H|psi> +=  <j|H|i> * c_i
+        hij = mo_two_e_integral_complex(p1, p2, h1, h2) - mo_two_e_integral_complex(p2, p1, h1, h2)
+        if (hij == (0.d0,0.d0)) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+        hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+          enddo
+        else            ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        end if
+      end do
+    else ! if 2 alpha and 2 beta particles
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+
+    ! hij = <psi_{selectors,i}|H|alpha> 
+          hij = mo_two_e_integral_complex(p1, p2, h1, h2)
+          if (hij /= (0.d0,0.d0)) then
+            ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+            hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            enddo
+          endif
+        end do
+      end do
+    end if
+
+  else ! if holes are (a,a) or (b,b)
+    if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b)
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+          hij = mo_two_e_integral_complex(p1, p2, h1, h2) - mo_two_e_integral_complex(p2,p1, h1, h2)
+          if (hij == (0.d0,0.d0)) cycle
+
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+          enddo
+        end do
+      end do
+    else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1)
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+        hij = mo_two_e_integral_complex(p1, p2, h1, h2)
+        if (hij == (0.d0,0.d0)) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+        hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+          enddo
+        endif
+      end do
+    else ! tip == 4  (a,a,b,b)
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_two_e_integral_complex(p1, p2, h1, h2) - mo_two_e_integral_complex(p2,p1, h1, h2))
+        if (hij /= (0.d0,0.d0)) then
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        end if
+      end if
+    end if
+  end if
+end
+
+
+subroutine get_d1_complex(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  complex*16, intent(in)   :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  complex*16, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  complex*16, allocatable :: hij_cache(:,:)
+  complex*16               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      tmp_row = (0.d0,0.d0)
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row = (0.d0,0.d0)
+    tmp_row2 = (0.d0,0.d0)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    putj = p1
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+!    enddo
+!      
+      putj = p2
+!    do puti=1,mo_num !HOT
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+        tmp_row = (0.d0,0.d0)
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row = (0.d0,0.d0)
+      tmp_row2 = (0.d0,0.d0)
+      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+        enddo
+      enddo
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+        call i_h_j_complex(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * dconjg(hij)
+        enddo
+      end do
+    end do
+end
+
+
+
+subroutine get_d0_complex(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be okay for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  complex*16, intent(in) :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj
+  double precision :: phase
+  complex*16 :: hij
+  double precision, external :: get_phase_bi
+  complex*16, external :: mo_two_e_integral_complex
+  logical :: ok
+
+  integer, parameter :: bant=1
+  complex*16, allocatable :: hij_cache1(:), hij_cache2(:)
+  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2)
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+          call i_h_j_complex(det, gen, N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache1(p2) * phase
+        end if
+        if (hij == (0.d0,0.d0)) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2)
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+          call i_h_j_complex(det, gen, N_int, hij)
+          if (hij == (0.d0,0.d0)) cycle
+        else
+          hij = (mo_two_e_integral_complex(p1, p2, puti, putj) -  mo_two_e_integral_complex(p2, p1, puti, putj))
+          if (hij == (0.d0,0.d0)) cycle
+          hij = dconjg(hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+        enddo
+      end do
+    end do
+  end if
+
+  deallocate(hij_cache1,hij_cache2)
+end
+
+subroutine get_d1_kpts(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  complex*16, intent(in)   :: coefs(N_states)
+  complex*16, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  complex*16, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+
+  integer :: kp1,ip1, kp2,ip2, khfix,ihfix, kputi,iputi, kputj,iputj, putj0
+  integer :: kpfix, ipfix, puti0
+  integer :: kputi1,kputi2,puti01,puti02
+  integer :: ii0
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  complex*16, allocatable :: hij_cache(:,:),hij_cache2(:,:)
+  complex*16               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  complex*16               :: tmp_row_kpts(N_states, mo_num), tmp_row2_kpts(N_states, mo_num)
+  complex*16               :: tmp_row_kpts2(N_states, mo_num_per_kpt), tmp_row2_kpts2(N_states,mo_num_per_kpt)
+  complex*16 :: tmp_mat1(N_states,mo_num,mo_num), tmp_mat2(N_states,mo_num,mo_num)
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2),hij_cache2(mo_num_per_kpt,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    call get_kpt_idx_mo(puti,kputi,iputi)
+    call get_kpt_idx_mo(hfix,khfix,ihfix)
+    call get_kpt_idx_mo(p1,kp1,ip1)
+    call get_kpt_idx_mo(p2,kp2,ip2)
+
+    if(.not. bannedOrb(puti, mi)) then
+      !==================
+      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      !==================
+      call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+      tmp_row = (0.d0,0.d0)
+      tmp_row_kpts2 = (0.d0,0.d0)
+      kputj = kconserv(kp1,kp2,khfix)
+      putj0 = (kputj-1)*mo_num_per_kpt
+      !==================
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      !===========================
+      ! begin kpts testing
+      do putj = putj0+1, hfix-1
+        iputj = putj-putj0
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache2(iputj,1) - hij_cache2(iputj,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            !tmp_row_kpts(k,putj) = tmp_row_kpts(k,putj) + hij * coefs(k)
+            tmp_row_kpts2(k,iputj) = tmp_row_kpts2(k,iputj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj = hfix+1,putj0+mo_num_per_kpt
+        iputj = putj - putj0
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache2(iputj,2) - hij_cache2(iputj,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            !tmp_row_kpts(k,putj) = tmp_row_kpts(k,putj) + hij * coefs(k)
+            tmp_row_kpts2(k,iputj) = tmp_row_kpts2(k,iputj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      ! end kpts testing
+      !===========================================================
+      !print*,'tmp_row_k,tmp_row'
+      !do ii0=1,mo_num
+      !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+      !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG, ',ii0,hfix,p1,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+      !  endif
+      !enddo
+      !===========================================================
+      tmp_mat1 = (0.d0,0.d0)
+      tmp_mat2 = (0.d0,0.d0)
+      !===========================================================
+      if(ma == 1) then
+      !===========================================================
+        tmp_mat1(1:N_states,1:mo_num,puti) = tmp_mat1(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+        tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) = tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+                                         tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+      !===========================================================
+        !mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+        mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) = mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+                                         tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+      else
+      !===========================================================
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_mat1(k,puti,l) = tmp_mat1(k,puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+        do l=1,mo_num_per_kpt
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_mat2(k,puti,l+putj0) = tmp_mat2(k,puti,l+putj0) + tmp_row_kpts2(k,l)
+          enddo
+        enddo
+      !===========================================================
+        !do l=1,mo_num
+        !  !DIR$ LOOP COUNT AVG(4)
+        !  do k=1,N_states
+        !    mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+        !  enddo
+        !enddo
+        do l=1,mo_num_per_kpt
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k,puti,l+putj0) = mat(k,puti,l+putj0) + tmp_row_kpts2(k,l)
+          enddo
+        enddo
+      end if
+      !===========================================================
+        do k=1,N_states
+          do l=1,mo_num
+            do ii0=1,mo_num
+              if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+                print'((A),6(I5),2(2(E25.15),2X))','WarNInG 4a, ',k,l,ii0,hfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+            !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+            !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+              endif
+            enddo
+          enddo
+        enddo
+      !===========================================================
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    call get_kpt_idx_mo(pfix,kpfix,ipfix)
+    tmp_row = (0.d0,0.d0)
+    tmp_row2 = (0.d0,0.d0)
+    !tmp_row_kpts = (0.d0,0.d0)
+    !tmp_row2_kpts = (0.d0,0.d0)
+    tmp_row_kpts2 = (0.d0,0.d0)
+    tmp_row2_kpts2 = (0.d0,0.d0)
+    !===========================================================
+    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    !===========================================================
+    call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,pfix,ipfix,kpfix,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+    call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,pfix,ipfix,kpfix,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+    putj = p1
+    !============
+    !begin ref
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+!    enddo
+!      
+      putj = p2
+!    do puti=1,mo_num !HOT
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= (0.d0,0.d0)) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      end if
+    end do
+    !end ref
+    !===================
+    !begin kpts
+    if (kp1.eq.kp2) then
+    !if (.False.) then
+      kputi1 = kconserv(kpfix,kp1,khfix)
+      kputi2 = kputi1
+      puti01 = (kputi1-1)*mo_num_per_kpt
+      puti02 = puti01
+      do iputi=1,mo_num_per_kpt !HOT
+        puti = puti01 + iputi
+        if(lbanned(puti,mi)) cycle
+        !p1 fixed
+        putj = p1
+        if(.not. banned(putj,puti,bant)) then
+          hij = hij_cache2(iputi,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row_kpts2(k,iputi) = tmp_row_kpts2(k,iputi) + hij * coefs(k)
+              !tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+!      enddo
+!        
+        putj = p2
+!      do puti=1,mo_num !HOT
+        if(.not. banned(putj,puti,bant)) then
+          hij = hij_cache2(iputi,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+            do k=1,N_states
+              tmp_row2_kpts2(k,iputi) = tmp_row2_kpts2(k,iputi) + hij * coefs(k)
+              !tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+    else !kp1.ne.kp2
+      kputi2 = kconserv(kpfix,kp2,khfix)
+      puti02 = (kputi2-1)*mo_num_per_kpt
+      putj = p1
+      do iputi=1,mo_num_per_kpt !HOT
+        puti = puti02 + iputi
+        if(lbanned(puti,mi)) cycle
+        !p1 fixed
+        if(.not. banned(putj,puti,bant)) then
+          hij = hij_cache2(iputi,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row_kpts2(k,iputi) = tmp_row_kpts2(k,iputi) + hij * coefs(k)
+              !tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      enddo
+!        
+      putj = p2
+      kputi1 = kconserv(kpfix,kp1,khfix)
+      puti01 = (kputi1-1)*mo_num_per_kpt
+      do iputi=1,mo_num_per_kpt !HOT
+        puti = puti01 + iputi
+        if(lbanned(puti,mi)) cycle
+        if(.not. banned(putj,puti,bant)) then
+          hij = hij_cache2(iputi,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+            do k=1,N_states
+              tmp_row2_kpts2(k,iputi) = tmp_row2_kpts2(k,iputi) + hij * coefs(k)
+              !tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+    endif
+    !end kpts
+    !===================
+    !test printing
+    !print'((A),5(I5))','kpt info1: ',kconserv(kpfix,kp2,khfix),khfix,kpfix,kp2,kputi2
+    !print'((A),5(I5))','kpt info2: ',kconserv(kpfix,kp1,khfix),khfix,kpfix,kp1,kputi1
+    !do ii0=1,mo_num
+    !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+    !!  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+    !  endif
+    !  if (cdabs(tmp_row2_kpts(1,ii0)-tmp_row2(1,ii0)).gt.1.d-12) then
+    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2a, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+    !!  else if ((cdabs(tmp_row2_kpts(1,ii0))+cdabs(tmp_row2(1,ii0))).gt.1.d-12) then
+    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2b, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+    !  endif
+    !enddo
+    !===================
+
+    tmp_mat1 = (0.d0,0.d0)
+    tmp_mat2 = (0.d0,0.d0)
+    if(mi == 1) then
+    !===================
+      tmp_mat1(:,:,p1) = tmp_mat1(:,:,p1) + tmp_row(:,:)
+      tmp_mat1(:,:,p2) = tmp_mat1(:,:,p2) + tmp_row2(:,:)
+      tmp_mat2(:,puti02+1:puti02+mo_num_per_kpt,p1) = tmp_mat2(:,puti02+1:puti02+mo_num_per_kpt,p1) + tmp_row_kpts2(:,:)
+      tmp_mat2(:,puti01+1:puti01+mo_num_per_kpt,p2) = tmp_mat2(:,puti01+1:puti01+mo_num_per_kpt,p2) + tmp_row2_kpts2(:,:)
+    !===================
+      !mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      !mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+      mat(:,puti02+1:puti02+mo_num_per_kpt,p1) = mat(:,puti02+1:puti02+mo_num_per_kpt,p1) + tmp_row_kpts2(:,:)
+      mat(:,puti01+1:puti01+mo_num_per_kpt,p2) = mat(:,puti01+1:puti01+mo_num_per_kpt,p2) + tmp_row2_kpts2(:,:)
+    else
+    !===================
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          tmp_mat1(k,p1,l) = tmp_mat1(k,p1,l) + tmp_row(k,l)
+          tmp_mat1(k,p2,l) = tmp_mat1(k,p2,l) + tmp_row2(k,l)
+        enddo
+      enddo
+      do l=1,mo_num_per_kpt
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          tmp_mat2(k,p1,l+puti02) = tmp_mat2(k,p1,l+puti02) + tmp_row_kpts2(k,l)
+          tmp_mat2(k,p2,l+puti01) = tmp_mat2(k,p2,l+puti01) + tmp_row2_kpts2(k,l)
+        enddo
+      enddo
+    !===================
+      !do l=1,mo_num
+      !  !DIR$ LOOP COUNT AVG(4)
+      !  do k=1,N_states
+      !    mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
+      !    mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+      !  enddo
+      !enddo
+      do l=1,mo_num_per_kpt
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l+puti02) = mat(k,p1,l+puti02) + tmp_row_kpts2(k,l)
+          mat(k,p2,l+puti01) = mat(k,p2,l+puti01) + tmp_row2_kpts2(k,l)
+        enddo
+      enddo
+    end if
+      !===========================================================
+        do k=1,N_states
+          do l=1,mo_num
+            do ii0=1,mo_num
+              if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+                print'((A),7(I5),2(2(E25.15),2X))','WarNInG 5a, ',k,l,ii0,hfix,pfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+            !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+            !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+              endif
+            enddo
+          enddo
+        enddo
+      !===========================================================
+    !todo: kpts okay up to this point in get_d1_complex
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        call get_kpt_idx_mo(puti,kputi,iputi)
+        call get_kpt_idx_mo(hfix,khfix,ihfix)
+        call get_kpt_idx_mo(p1,kp1,ip1)
+        call get_kpt_idx_mo(p2,kp2,ip2)
+        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,p2,ip2,kp2,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+        call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,p1,ip1,kp1,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+        tmp_row = (0.d0,0.d0)
+        !tmp_row_kpts = (0.d0,0.d0)
+        tmp_row_kpts2 = (0.d0,0.d0)
+        !===================
+        !begin ref
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        !end ref
+        !=================
+        !begin kpts
+        kputj = kconserv(kp1,kp2,khfix)
+        putj0 = (kputj-1)*mo_num_per_kpt
+        do putj = putj0+1,hfix-1
+          iputj = putj - putj0
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache2(iputj,1) - hij_cache2(iputj,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            !tmp_row_kpts(:,putj) = tmp_row_kpts(:,putj) + hij * coefs(:)
+            tmp_row_kpts2(:,iputj) = tmp_row_kpts2(:,iputj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,putj0+mo_num_per_kpt
+          iputj = putj - putj0
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache2(iputj,2) - hij_cache2(iputj,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            !tmp_row_kpts(:,putj) = tmp_row_kpts(:,putj) + hij * coefs(:)
+            tmp_row_kpts2(:,iputj) = tmp_row_kpts2(:,iputj) + hij * coefs(:)
+          endif
+        end do
+
+        !end kpts
+    !do ii0=1,mo_num
+    !  if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+    !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+    !!  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+    !!    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+    !  endif
+    !enddo
+        !=================
+        tmp_mat1 = (0.d0,0.d0)
+        tmp_mat2 = (0.d0,0.d0)
+        tmp_mat1(:, :puti-1, puti) = tmp_mat1(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_mat1(k, puti, l) = tmp_mat1(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+        !=================
+        if (kputj.lt.kputi) then
+          tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) =  &
+                  tmp_mat2(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+                  tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+        else if (kputj.gt.kputi) then
+          do l=1,mo_num_per_kpt
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_mat2(k, puti, l+putj0) = tmp_mat2(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+            enddo
+          enddo
+        else !kputj == kputi
+          tmp_mat2(1:N_states,putj0+1:puti-1,puti) =  &
+                  tmp_mat2(1:N_states,putj0+1:puti-1,puti) + &
+                  tmp_row_kpts2(1:N_states,1:iputi-1)
+          do l=iputi,mo_num_per_kpt
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_mat2(k, puti, l+putj0) = tmp_mat2(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+            enddo
+          enddo
+        endif
+        !=================
+        do k=1,N_states
+          do l=1,mo_num
+            do ii0=1,mo_num
+              if (cdabs(tmp_mat2(k,l,ii0)-tmp_mat1(k,l,ii0)).gt.1.d-12) then
+                print'((A),6(I5),2(2(E25.15),2X))','WarNInG 3a, ',k,l,ii0,hfix,p1,p2,tmp_mat2(k,l,ii0),tmp_mat1(k,l,ii0)
+            !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+            !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+              endif
+            enddo
+          enddo
+        enddo
+
+        !=================
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+        !!=================
+        !!todo: check for iputi=1,2
+        !if (kputj.lt.kputi) then
+        !  mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) =  &
+        !          mat(1:N_states,putj0+1:putj0+mo_num_per_kpt,puti) + &
+        !          tmp_row_kpts2(1:N_states,1:mo_num_per_kpt)
+        !else if (kputj.gt.kputi) then
+        !  do l=1,mo_num_per_kpt
+        !    !DIR$ LOOP COUNT AVG(4)
+        !    do k=1,N_states
+        !      mat(k, puti, l+putj0) = mat(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+        !    enddo
+        !  enddo
+        !else !kputj == kputi
+        !  mat(1:N_states,putj0+1:puti-1,puti) =  &
+        !          mat(1:N_states,putj0+1:puti-1,puti) + &
+        !          tmp_row_kpts2(1:N_states,1:iputi-1)
+        !  do l=iputi,mo_num_per_kpt
+        !    !DIR$ LOOP COUNT AVG(4)
+        !    do k=1,N_states
+        !      mat(k, puti, l+putj0) = mat(k, puti,l+putj0) + tmp_row_kpts2(k,l)
+        !    enddo
+        !  enddo
+        !endif
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      call get_kpt_idx_mo(pfix,kpfix,ipfix)
+      call get_kpt_idx_mo(hfix,khfix,ihfix)
+      call get_kpt_idx_mo(p1,kp1,ip1)
+      call get_kpt_idx_mo(p2,kp2,ip2)
+      tmp_row = (0.d0,0.d0)
+      tmp_row2 = (0.d0,0.d0)
+      !tmp_row_kpts = (0.d0,0.d0)
+      !tmp_row2_kpts = (0.d0,0.d0)
+      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      !call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p1,ip1,kp1,pfix,ipfix,kpfix,mo_num_per_kpt,hij_cache2(1,1),mo_integrals_map,mo_integrals_map_2)
+      !call get_mo_two_e_integrals_kpts(hfix,ihfix,khfix,p2,ip2,kp2,pfix,ipfix,kpfix,mo_num_per_kpt,hij_cache2(1,2),mo_integrals_map,mo_integrals_map_2)
+      !===============
+      !begin ref
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= (0.d0,0.d0)) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      !end ref
+      !===============
+      !begin kpts
+      !todo: combine if kp1==kp2
+ !     putj = p2
+ !     kputi1 = kconserv(kp1,kpfix,khfix)
+ !     puti01 = (kputi1-1)*mo_num_per_kpt
+ !     do iputi=1,mo_num_per_kpt
+ !       puti = puti01 + iputi
+ !       if(lbanned(puti,ma)) cycle
+ !       if(.not. banned(puti,putj,1)) then
+ !         hij = hij_cache2(iputi,1)
+ !         if (hij /= (0.d0,0.d0)) then
+ !           hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+ !           !DIR$ LOOP COUNT AVG(4)
+ !           do k=1,N_states
+ !             tmp_row_kpts(k,puti) = tmp_row_kpts(k,puti) + hij * coefs(k)
+ !           enddo
+ !         endif
+ !       end if
+ !     enddo
+ !     putj = p1
+ !     kputi2 = kconserv(kp2,kpfix,khfix)
+ !     puti02 = (kputi2-1)*mo_num_per_kpt
+ !     do iputi=1,mo_num_per_kpt
+ !       puti = puti02 + iputi
+ !       if(lbanned(puti,ma)) cycle
+ !       if(.not. banned(puti,putj,1)) then
+ !         hij = hij_cache2(iputi,2)
+ !         if (hij /= (0.d0,0.d0)) then
+ !           hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+ !           do k=1,N_states
+ !             tmp_row2_kpts(k,puti) = tmp_row2_kpts(k,puti) + hij * coefs(k)
+ !           enddo
+ !         endif
+ !       end if
+ !     end do
+ !     !end kpts
+ !     !===============
+ !   !test printing
+ !   !print'((A),5(I5))','kpt info1: ',kconserv(kpfix,kp2,khfix),khfix,kpfix,kp2,kputi2
+ !   !print'((A),5(I5))','kpt info2: ',kconserv(kpfix,kp1,khfix),khfix,kpfix,kp1,kputi1
+ !   do ii0=1,mo_num
+ !     if (cdabs(tmp_row_kpts(1,ii0)-tmp_row(1,ii0)).gt.1.d-12) then
+ !       print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1a, ',ii0,hfix,p1,pfix,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+ !   !  else if ((cdabs(tmp_row_kpts(1,ii0))+cdabs(tmp_row(1,ii0))).gt.1.d-12) then
+ !   !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 1b, ',ii0,hfix,pfix,p2,tmp_row_kpts(1,ii0),tmp_row(1,ii0)
+ !     endif
+ !     if (cdabs(tmp_row2_kpts(1,ii0)-tmp_row2(1,ii0)).gt.1.d-12) then
+ !       print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2a, ',ii0,hfix,p2,pfix,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+ !   !  else if ((cdabs(tmp_row2_kpts(1,ii0))+cdabs(tmp_row2(1,ii0))).gt.1.d-12) then
+ !   !    print'((A),4(I5),2(2(E25.15),2X))','WarNInG 2b, ',ii0,hfix,pfix,p1,tmp_row2_kpts(1,ii0),tmp_row2(1,ii0)
+ !     endif
+ !   enddo
+    !===================
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+        enddo
+      enddo
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+        call i_h_j_complex(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * dconjg(hij)
+        enddo
+      end do
+    end do
+end
+
+
diff --git a/src/cipsi/selection_types.f90 b/src/cipsi/selection_types.f90
index 58ce0e03..53250b57 100644
--- a/src/cipsi/selection_types.f90
+++ b/src/cipsi/selection_types.f90
@@ -18,7 +18,12 @@ module selection_types
   integer function pt2_type_size(N)
     implicit none
     integer, intent(in) :: N
-    pt2_type_size = (3*n + n*n)
+    if (is_complex) then
+      pt2_type_size = (3*n + 2*n*n)
+    else
+      pt2_type_size = (3*n + n*n)
+    endif
+
   end function
 
 end module
diff --git a/src/cipsi/slave_cipsi.irp.f b/src/cipsi/slave_cipsi.irp.f
index 1dc3e784..eaa64673 100644
--- a/src/cipsi/slave_cipsi.irp.f
+++ b/src/cipsi/slave_cipsi.irp.f
@@ -14,10 +14,17 @@ subroutine run_slave_cipsi
 end
 
 subroutine provide_everything
-  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
-  PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context
-  PROVIDE psi_det psi_coef threshold_generators state_average_weight
-  PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
+    PROVIDE pt2_e0_denominator mo_num_per_kpt N_int ci_energy mpi_master zmq_state zmq_context
+    PROVIDE psi_det psi_coef_complex threshold_generators state_average_weight
+    PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+  else
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
+    PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context
+    PROVIDE psi_det psi_coef threshold_generators state_average_weight
+    PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+  endif
 end
 
 subroutine run_slave_main
@@ -51,9 +58,15 @@ subroutine run_slave_main
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
 
-  PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master
-  PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
-  PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+  if (is_complex) then
+    PROVIDE psi_det psi_coef_complex threshold_generators state_average_weight mpi_master
+    PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
+    PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+  else
+    PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master
+    PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
+    PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+  endif
 
   IRP_IF MPI
     call MPI_BARRIER(MPI_COMM_WORLD, ierr)
@@ -268,6 +281,10 @@ subroutine run_slave_main
                 + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
                 + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
                 ) / 1024.d0**3
+          if (is_complex) then
+            ! mat is complex
+            mem = mem + (nproc_target * 8.d0 * (n_states*mo_num*mo_num)) / 1024.d0**3
+          endif
 
           if (nproc_target == 0) then
             call check_mem(mem,irp_here)
diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f
index c529795e..ea3dce9d 100644
--- a/src/cipsi/stochastic_cipsi.irp.f
+++ b/src/cipsi/stochastic_cipsi.irp.f
@@ -37,29 +37,46 @@ subroutine run_stochastic_cipsi
   pt2_data % rpt2  = -huge(1.e0)
   pt2_data % overlap= 0.d0
   pt2_data % variance = huge(1.e0)
+  if (is_complex) then
+    pt2_data % overlap_imag = 0.d0
+  endif
 
   if (s2_eig) then
     call make_s2_eigenfunction
   endif
-  call diagonalize_CI
+  if (is_complex) then
+    call diagonalize_ci_complex
+  else
+    call diagonalize_ci
+  endif
   call save_wavefunction
 
   call ezfio_has_hartree_fock_energy(has)
   if (has) then
     call ezfio_get_hartree_fock_energy(hf_energy_ref)
   else
-    hf_energy_ref = ref_bitmask_energy
+    hf_energy_ref = ref_bitmask_energy_with_nucl_rep
   endif
 
   if (N_det > N_det_max) then
     psi_det = psi_det_sorted
-    psi_coef = psi_coef_sorted
-    N_det = N_det_max
-    soft_touch N_det psi_det psi_coef
+    if (is_complex) then
+      psi_coef_complex = psi_coef_sorted_complex
+      N_det = N_det_max
+      soft_touch N_det psi_det psi_coef_complex
+    else
+      psi_coef = psi_coef_sorted
+      N_det = N_det_max
+      soft_touch N_det psi_det psi_coef
+    endif
     if (s2_eig) then
       call make_s2_eigenfunction
     endif
-    call diagonalize_CI
+    if (is_complex) then
+      call diagonalize_ci_complex
+    else
+      call diagonalize_CI
+    endif
     call save_wavefunction
   endif
 
@@ -103,14 +120,22 @@ subroutine run_stochastic_cipsi
     if (qp_stop()) exit
 
     ! Add selected determinants
-    call copy_H_apply_buffer_to_wf()
+    call copy_h_apply_buffer_to_wf()
 !    call save_wavefunction
 
-    PROVIDE  psi_coef
+    if (is_complex) then
+      PROVIDE  psi_coef_complex
+    else
+      PROVIDE  psi_coef
+    endif
     PROVIDE  psi_det
     PROVIDE  psi_det_sorted
 
-    call diagonalize_CI
+    if (is_complex) then
+      call diagonalize_ci_complex
+    else
+      call diagonalize_CI
+    endif
     call save_wavefunction
     call save_energy(psi_energy_with_nucl_rep, zeros)
     if (qp_stop()) exit
@@ -118,7 +143,11 @@ subroutine run_stochastic_cipsi
 
   if (.not.qp_stop()) then
     if (N_det < N_det_max) then
-        call diagonalize_CI
+        if (is_complex) then
+          call diagonalize_ci_complex
+        else
+          call diagonalize_CI
+        endif
         call save_wavefunction
         call save_energy(psi_energy_with_nucl_rep, zeros)
     endif
diff --git a/src/cipsi/zmq_selection.irp.f b/src/cipsi/zmq_selection.irp.f
index 448b409e..5ac1d6fb 100644
--- a/src/cipsi/zmq_selection.irp.f
+++ b/src/cipsi/zmq_selection.irp.f
@@ -16,6 +16,7 @@ subroutine ZMQ_selection(N_in, pt2_data)
   N = max(N_in,1)
   N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
   if (.True.) then
+    !todo: some providers have become unlinked for real/complex (det/coef); do these need to be provided?
     PROVIDE pt2_e0_denominator nproc
     PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
     PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
@@ -104,10 +105,24 @@ subroutine ZMQ_selection(N_in, pt2_data)
 
   f(:) = 1.d0
   if (.not.do_pt2) then
+<<<<<<< HEAD
     double precision :: f(N_states), u_dot_u
     do k=1,min(N_det,N_states)
      f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
     enddo
+=======
+  double precision :: f(N_states), u_dot_u
+    if (is_complex) then
+      double precision :: u_dot_u_complex
+      do k=1,min(N_det,N_states)
+       f(k) = 1.d0 / u_dot_u_complex(psi_selectors_coef_complex(1,k), N_det_selectors)
+      enddo
+    else
+      do k=1,min(N_det,N_states)
+       f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+      enddo
+    endif
+>>>>>>> origin/cleaning_kpts
   endif
 
   !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2_data)  PRIVATE(i) NUM_THREADS(nproc_target+1)
@@ -203,6 +218,9 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
   pt2_data % pt2(:)      = 0d0
   pt2_data % variance(:) = 0.d0
   pt2_data % overlap(:,:) = 0.d0
+  if (is_complex) then
+    pt2_data % overlap_imag(:,:) = 0.d0
+  endif
   call pt2_alloc(pt2_data_tmp,N_states)
   do while (more == 1)
     call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
@@ -231,3 +249,4 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
   call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
 end subroutine
 
+
diff --git a/src/cis/cis.irp.f b/src/cis/cis.irp.f
index 816253c5..63b83552 100644
--- a/src/cis/cis.irp.f
+++ b/src/cis/cis.irp.f
@@ -57,10 +57,18 @@ subroutine run
   implicit none
   integer                        :: i
 
-  if(pseudo_sym)then
-   call H_apply_cis_sym
+  if (is_complex) then
+    if(pseudo_sym)then
+     call H_apply_cis_sym_kpts
+    else
+     call H_apply_cis_kpts
+    endif
   else
-   call H_apply_cis
+    if(pseudo_sym)then
+     call H_apply_cis_sym
+    else
+     call H_apply_cis
+    endif
   endif
   print *,  'N_det = ', N_det
   print*,'******************************'
@@ -77,8 +85,13 @@ subroutine run
   endif
 
   call ezfio_set_cis_energy(CI_energy)
-  psi_coef = ci_eigenvectors
-  SOFT_TOUCH psi_coef
+  if (is_complex) then
+    psi_coef_complex = ci_eigenvectors_complex
+    SOFT_TOUCH psi_coef_complex
+  else
+    psi_coef = ci_eigenvectors
+    SOFT_TOUCH psi_coef
+  endif
   call save_wavefunction
 
 end
diff --git a/src/cis/kpts_cis.irp.f b/src/cis/kpts_cis.irp.f
new file mode 100644
index 00000000..cf88fa5f
--- /dev/null
+++ b/src/cis/kpts_cis.irp.f
@@ -0,0 +1,580 @@
+
+subroutine H_apply_cis_kpts_monoexc(key_in, hole_1,particl_1,fock_diag_tmp,i_generator,iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all single excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2)
+  integer(bit_kind),intent(in)   :: hole_1(N_int,2), particl_1(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind),allocatable  :: hole_save(:,:)
+  integer(bit_kind),allocatable  :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind),allocatable  :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind),allocatable  :: hole_2(:,:), particl_2(:,:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer,allocatable            :: occ_particle(:,:), occ_hole(:,:)
+  integer,allocatable            :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer,allocatable            :: ib_jb_pairs(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+  logical                        :: is_a_two_holes_two_particles
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  logical, allocatable           :: array_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+
+  integer(bit_kind)              :: key_mask(N_int, 2)
+
+  logical :: check_double_excitation
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: yes_no
+
+  do k=1,N_int
+    key_mask(k,1) = 0_bit_kind
+    key_mask(k,2) = 0_bit_kind
+  enddo
+
+  iproc = iproc_in
+
+  check_double_excitation = .True.
+  
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+  
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      do jj=1,N_elec_in_key_part_1(ispin)                            !particule
+        j_a = occ_particle(jj,ispin)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+  
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+  
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      key_idx += 1
+      do k=1,N_int
+        keys_out(k,1,key_idx) = hole(k,1)
+        keys_out(k,2,key_idx) = hole(k,2)
+      enddo
+      if (key_idx == size_max) then
+        call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+        key_idx = 0
+      endif
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, &
+      keys_out, hole_save,          &
+      key,hole, particle, hole_tmp,&
+      particle_tmp, occ_particle,    &
+      occ_hole, occ_particle_tmp,&
+      occ_hole_tmp,key_union_hole_part)
+  
+  
+
+end
+
+subroutine H_apply_cis_kpts()
+  implicit none
+  use omp_lib
+  use bitmasks
+  BEGIN_DOC
+  ! Calls H_apply on the |HF| determinant and selects all connected single and double
+  ! excitations (of the same symmetry). Auto-generated by the ``generate_h_apply`` script.
+  END_DOC
+
+  
+
+  integer                        :: i_generator
+  double precision               :: wall_0, wall_1
+  integer(bit_kind), allocatable :: mask(:,:,:)
+  integer(bit_kind), allocatable :: mask_kpts(:,:,:,:)
+  integer                        :: kk
+  integer                        :: ispin, k
+  integer                        :: iproc
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex
+  else
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators
+  endif
+
+  call wall_time(wall_0)
+
+  iproc = 0
+  !allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_num+1) )
+  allocate( mask_kpts(N_int,2,6,kpt_num), fock_diag_tmp(2,mo_num+1) )
+  do i_generator=1,N_det_generators
+
+    ! Compute diagonal of the Fock matrix
+    !call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
+    fock_diag_tmp=0.d0
+
+    ! Create bit masks for holes and particles
+    do kk=1,kpt_num
+    do ispin=1,2
+      do k=1,N_int
+        mask_kpts(k,ispin,s_hole,kk) =                                      &
+            iand(generators_bitmask_kpts(k,ispin,s_hole,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,s_part,kk) =                                      &
+            iand(generators_bitmask_kpts(k,ispin,s_part,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+       ! mask_kpts(k,ispin,d_hole1,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_hole1,kk),  &
+       !     psi_det_generators(k,ispin,i_generator) )
+       ! mask_kpts(k,ispin,d_part1,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_part1,kk),  &
+       !     not(psi_det_generators(k,ispin,i_generator)) )
+       ! mask_kpts(k,ispin,d_hole2,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_hole2,kk),  &
+       !     psi_det_generators(k,ispin,i_generator) )
+       ! mask_kpts(k,ispin,d_part2,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_part2,kk),  &
+       !     not(psi_det_generators(k,ispin,i_generator)) )
+      enddo
+    enddo
+    enddo
+    !if(.False.)then
+    ! call H_apply_cis_kpts_diexc(psi_det_generators(1,1,i_generator),      &
+    !     psi_det_generators(1,1,1),                                   &
+    !     mask(1,1,d_hole1), mask(1,1,d_part1),                        &
+    !     mask(1,1,d_hole2), mask(1,1,d_part2),                        &
+    !     fock_diag_tmp, i_generator, iproc )
+    !endif
+    if(.True.)then
+      do kk=1,kpt_num
+     call H_apply_cis_kpts_monoexc(psi_det_generators(1,1,i_generator),    &
+         mask_kpts(1,1,s_hole,kk), mask_kpts(1,1,s_part,kk ),                        &
+         fock_diag_tmp, i_generator, iproc )
+      enddo
+    endif
+    call wall_time(wall_1)
+    
+    if (wall_1 - wall_0 > 2.d0) then
+        write(6,*)  &
+       100.*float(i_generator)/float(N_det_generators), '% in ', wall_1-wall_0, 's'
+        wall_0 = wall_1
+    endif
+  enddo
+
+  !deallocate( mask, fock_diag_tmp )
+  deallocate( mask_kpts, fock_diag_tmp )
+
+  call copy_H_apply_buffer_to_wf
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex N_det
+  else
+    SOFT_TOUCH psi_det psi_coef N_det
+  endif
+
+  
+  ! Sort H_jj to find the N_states lowest states
+  integer                        :: i
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: H_jj(:)
+  double precision, external     :: diag_h_mat_elem
+  allocate(H_jj(N_det),iorder(N_det))
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP SHARED(psi_det,N_int,H_jj,iorder,N_det)                  &
+      !$OMP PRIVATE(i)
+  !$OMP DO
+  do i = 1, N_det
+    H_jj(i) = diag_h_mat_elem(psi_det(1,1,i),N_int)
+    iorder(i) = i
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dsort(H_jj,iorder,N_det)
+  if (is_complex) then
+    do k=1,N_states
+      psi_coef_complex(iorder(k),k) = (1.d0,0.d0)
+    enddo
+  else
+    do k=1,N_states
+      psi_coef(iorder(k),k) = 1.d0
+    enddo
+  endif
+  deallocate(H_jj,iorder)
+    
+
+end
+
+
+
+
+subroutine H_apply_cis_sym_kpts_monoexc(key_in, hole_1,particl_1,fock_diag_tmp,i_generator,iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all single excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2)
+  integer(bit_kind),intent(in)   :: hole_1(N_int,2), particl_1(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind),allocatable  :: hole_save(:,:)
+  integer(bit_kind),allocatable  :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind),allocatable  :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind),allocatable  :: hole_2(:,:), particl_2(:,:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer,allocatable            :: occ_particle(:,:), occ_hole(:,:)
+  integer,allocatable            :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer,allocatable            :: ib_jb_pairs(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+  logical                        :: is_a_two_holes_two_particles
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  logical, allocatable           :: array_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+
+  integer(bit_kind)              :: key_mask(N_int, 2)
+
+  logical :: check_double_excitation
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: yes_no
+
+  do k=1,N_int
+    key_mask(k,1) = 0_bit_kind
+    key_mask(k,2) = 0_bit_kind
+  enddo
+
+  iproc = iproc_in
+
+  check_double_excitation = .True.
+  
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+  
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      do jj=1,N_elec_in_key_part_1(ispin)                            !particule
+        j_a = occ_particle(jj,ispin)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+  
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+  
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+     call connected_to_hf(hole,yes_no)
+     if (.not.yes_no) cycle
+    
+      key_idx += 1
+      do k=1,N_int
+        keys_out(k,1,key_idx) = hole(k,1)
+        keys_out(k,2,key_idx) = hole(k,2)
+      enddo
+      if (key_idx == size_max) then
+        call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+        key_idx = 0
+      endif
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, &
+      keys_out, hole_save,          &
+      key,hole, particle, hole_tmp,&
+      particle_tmp, occ_particle,    &
+      occ_hole, occ_particle_tmp,&
+      occ_hole_tmp,key_union_hole_part)
+  
+  
+
+end
+
+subroutine H_apply_cis_sym_kpts()
+  implicit none
+  use omp_lib
+  use bitmasks
+  BEGIN_DOC
+  ! Calls H_apply on the |HF| determinant and selects all connected single and double
+  ! excitations (of the same symmetry). Auto-generated by the ``generate_h_apply`` script.
+  END_DOC
+
+  
+
+  integer                        :: i_generator
+  double precision               :: wall_0, wall_1
+  integer(bit_kind), allocatable :: mask(:,:,:)
+  integer(bit_kind), allocatable :: mask_kpts(:,:,:,:)
+  integer                        :: kk
+  integer                        :: ispin, k
+  integer                        :: iproc
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex
+  else
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators
+  endif
+
+  call wall_time(wall_0)
+
+  iproc = 0
+  !allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_num+1) )
+  allocate( mask_kpts(N_int,2,6,kpt_num), fock_diag_tmp(2,mo_num+1) )
+  do i_generator=1,N_det_generators
+
+    ! Compute diagonal of the Fock matrix
+    !call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
+    fock_diag_tmp=0.d0
+
+    ! Create bit masks for holes and particles
+    do kk=1,kpt_num
+    do ispin=1,2
+      do k=1,N_int
+        mask_kpts(k,ispin,d_hole2,kk) =                                      &
+            iand(generators_bitmask_kpts(k,ispin,d_hole2,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,d_part2,kk) =                                      &
+            iand(generators_bitmask_kpts(k,ispin,d_part2,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+       ! mask_kpts(k,ispin,d_hole1,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_hole1,kk),  &
+       !     psi_det_generators(k,ispin,i_generator) )
+       ! mask_kpts(k,ispin,d_part1,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_part1,kk),  &
+       !     not(psi_det_generators(k,ispin,i_generator)) )
+       ! mask_kpts(k,ispin,d_hole2,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_hole2,kk),  &
+       !     psi_det_generators(k,ispin,i_generator) )
+       ! mask_kpts(k,ispin,d_part2,kk) =                                      &
+       !     iand(generators_bitmask_kpts(k,ispin,d_part2,kk),  &
+       !     not(psi_det_generators(k,ispin,i_generator)) )
+      enddo
+    enddo
+    enddo
+    !if(.False.)then
+    ! call H_apply_cis_sym_kpts_diexc(psi_det_generators(1,1,i_generator),      &
+    !     psi_det_generators(1,1,1),                                   &
+    !     mask(1,1,d_hole1), mask(1,1,d_part1),                        &
+    !     mask(1,1,d_hole2), mask(1,1,d_part2),                        &
+    !     fock_diag_tmp, i_generator, iproc )
+    !endif
+    if(.True.)then
+      do kk=1,kpt_num
+     call H_apply_cis_sym_kpts_monoexc(psi_det_generators(1,1,i_generator),    &
+         mask_kpts(1,1,s_hole,kk), mask_kpts(1,1,s_part,kk ),                        &
+         fock_diag_tmp, i_generator, iproc )
+      enddo
+    endif
+    call wall_time(wall_1)
+    
+    if (wall_1 - wall_0 > 2.d0) then
+        write(6,*)  &
+       100.*float(i_generator)/float(N_det_generators), '% in ', wall_1-wall_0, 's'
+        wall_0 = wall_1
+    endif
+  enddo
+
+  !deallocate( mask, fock_diag_tmp )
+  deallocate( mask_kpts, fock_diag_tmp )
+
+  call copy_H_apply_buffer_to_wf
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex N_det
+  else
+    SOFT_TOUCH psi_det psi_coef N_det
+  endif
+
+  
+  ! Sort H_jj to find the N_states lowest states
+  integer                        :: i
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: H_jj(:)
+  double precision, external     :: diag_h_mat_elem
+  allocate(H_jj(N_det),iorder(N_det))
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP SHARED(psi_det,N_int,H_jj,iorder,N_det)                  &
+      !$OMP PRIVATE(i)
+  !$OMP DO
+  do i = 1, N_det
+    H_jj(i) = diag_h_mat_elem(psi_det(1,1,i),N_int)
+    iorder(i) = i
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dsort(H_jj,iorder,N_det)
+  if (is_complex) then
+    do k=1,N_states
+      psi_coef_complex(iorder(k),k) = (1.d0,0.d0)
+    enddo
+  else
+    do k=1,N_states
+      psi_coef(iorder(k),k) = 1.d0
+    enddo
+  endif
+  deallocate(H_jj,iorder)
+    
+
+end
+
+
diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index 6c55e2ff..cf19e629 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -56,21 +56,38 @@ subroutine run
   double precision               :: cisdq(N_states), delta_e
   double precision,external      :: diag_h_mat_elem
 
-  if(pseudo_sym)then
-   call H_apply_cisd_sym
+  if (is_complex) then
+    call H_apply_cisd_kpts
   else
-   call H_apply_cisd
+    if(pseudo_sym)then
+     call H_apply_cisd_sym
+    else
+     call H_apply_cisd
+    endif
+  endif
+  if (is_complex) then
+    psi_coef_complex = ci_eigenvectors_complex
+    SOFT_TOUCH psi_coef_complex
+  else
+    psi_coef = ci_eigenvectors
+    SOFT_TOUCH psi_coef
   endif
-  psi_coef = ci_eigenvectors
-  SOFT_TOUCH psi_coef
   call save_wavefunction
   call ezfio_set_cisd_energy(CI_energy)
 
-  do i = 1,N_states
-    k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1)
-    delta_E  = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int)
-    cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
-  enddo
+  if (is_complex) then
+    do i = 1,N_states
+      k = maxloc(cdabs(psi_coef_sorted_complex(1:N_det,i)),dim=1)
+      delta_E  = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int)
+      cisdq(i) = CI_energy(i) + delta_E * (1.d0 - cdabs(psi_coef_sorted_complex(k,i))**2)
+    enddo
+  else
+    do i = 1,N_states
+      k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1)
+      delta_E  = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int)
+      cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
+    enddo
+  endif
   print *,  'N_det = ', N_det
   print*,''
   print*,'******************************'
diff --git a/src/cisd/cisd_routine.irp.f b/src/cisd/cisd_routine.irp.f
index 93b31e7d..e243e113 100644
--- a/src/cisd/cisd_routine.irp.f
+++ b/src/cisd/cisd_routine.irp.f
@@ -20,8 +20,13 @@ subroutine run_cisd
       print*, i ,CI_energy(i) - CI_energy(1)
     enddo
   endif
-  psi_coef = ci_eigenvectors
-  SOFT_TOUCH psi_coef
+  if (is_complex) then
+    psi_coef_complex = ci_eigenvectors_complex
+    SOFT_TOUCH psi_coef_complex
+  else
+    psi_coef = ci_eigenvectors
+    SOFT_TOUCH psi_coef
+  endif
   call save_wavefunction
   call ezfio_set_cisd_energy(CI_energy)
 
diff --git a/src/cisd/kpts_cisd.irp.f b/src/cisd/kpts_cisd.irp.f
new file mode 100644
index 00000000..8e37956f
--- /dev/null
+++ b/src/cisd/kpts_cisd.irp.f
@@ -0,0 +1,666 @@
+
+subroutine H_apply_cisd_kpts_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  implicit none
+  integer(bit_kind), intent(in)         :: key_in(N_int, 2), hole_1(N_int, 2), hole_2(N_int, 2)
+  integer(bit_kind), intent(in)         :: particl_1(N_int, 2), particl_2(N_int, 2)
+  integer(bit_kind)                     :: p1_mask(N_int, 2), p2_mask(N_int, 2), tmp
+  integer,intent(in)                    :: i_generator,iproc_in
+  integer                               :: status(N_int*bit_kind_size, 2)
+  integer                               :: highest, p1,p2,sp,ni,i,mi,nt,ns,k
+  double precision, intent(in)          :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind), intent(in)         :: key_prev(N_int, 2, *)
+  PROVIDE N_int
+  PROVIDE N_det
+
+  
+
+  highest = 0
+  do k=1,N_int*bit_kind_size
+    status(k,1) = 0
+    status(k,2) = 0
+  enddo
+  do sp=1,2
+    do ni=1,N_int
+      do i=1,bit_kind_size
+        if(iand(1_bit_kind,shiftr(key_in(ni, sp), (i-1))) == 0) then
+          cycle
+        end if
+        mi = (ni-1)*bit_kind_size+i
+        status(mi, sp) = int(iand(1_bit_kind,shiftr(hole_1(ni,sp),(i-1))),4)
+        status(mi, sp) = status(mi, sp) + 2*int(iand(1_bit_kind,shiftr(hole_2(ni,sp),(i-1))),4)
+        if(status(mi, sp) /= 0 .and. mi > highest) then
+          highest = mi
+        end if
+      end do
+    end do
+  end do
+
+  do sp=1,2
+    do p1=1,highest
+      if(status(p1, sp) == 0) then
+        cycle
+      end if
+      do p2=1,highest
+        if(status(p2, sp) == 0) then
+          cycle
+        end if
+        if((status(p1, sp) == 1 .and. status(p2, sp) > 1) .or. &
+            (status(p1, sp) == 2 .and. status(p2, sp) == 3) .or. &
+            (status(p1, sp) == 3 .and. status(p2, sp) == 3 .and. p2 > p1)) then
+          call H_apply_cisd_kpts_diexcP(key_in, sp, p1, particl_1, sp, p2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+        end if
+      end do
+    end do
+  end do
+  do p1=1,highest
+    if(status(p1, 1) == 0) then
+      cycle
+    end if
+    do p2=1,highest
+      if(status(p2, 2) == 0) then
+        cycle
+      end if
+      if((status(p1, 1) == 3) .or. &
+          (status(p1, 1) == 1 .and. status(p2, 2) >= 2) .or. &
+          (status(p1, 1) == 2 .and. status(p2, 2) /= 2)) then
+
+          call H_apply_cisd_kpts_diexcP(key_in, 1, p1, particl_1, 2, p2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+      end if
+    end do
+  end do
+end subroutine
+
+
+subroutine H_apply_cisd_kpts_diexcP(key_in, fs1, fh1, particl_1, fs2, fh2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  implicit none
+  integer(bit_kind), intent(in)         :: key_in(N_int, 2), particl_1(N_int, 2), particl_2(N_int, 2)
+  double precision, intent(in)          :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind)                     :: p1_mask(N_int, 2), p2_mask(N_int, 2), key_mask(N_int, 2)
+  integer,intent(in)                    :: fs1,fs2,i_generator,iproc_in, fh1,fh2
+  integer(bit_kind)                     :: miniList(N_int, 2, N_det)
+  integer                               :: n_minilist, n_alpha, n_beta, deg(2), i, ni, k
+  
+  integer(bit_kind), parameter :: one = 1_bit_kind
+
+  do k=1,N_int
+    p1_mask(k,1) = 0_bit_kind
+    p1_mask(k,2) = 0_bit_kind
+    p2_mask(k,1) = 0_bit_kind
+    p2_mask(k,2) = 0_bit_kind
+  enddo
+  p1_mask(shiftr(fh1-1,bit_kind_shift) + 1, fs1) = shiftl(one,iand(fh1-1,bit_kind_size-1))
+  p2_mask(shiftr(fh2-1,bit_kind_shift) + 1, fs2) = shiftl(one,iand(fh2-1,bit_kind_size-1))
+
+  do k=1,N_int
+    key_mask(k,1) = key_in(k,1)
+    key_mask(k,2) = key_in(k,2)
+  enddo
+
+  key_mask(shiftr(fh1-1,bit_kind_shift) + 1, fs1) -= shiftl(one,iand(fh1-1,bit_kind_size-1))
+  key_mask(shiftr(fh2-1,bit_kind_shift) + 1, fs2) -= shiftl(one,iand(fh2-1,bit_kind_size-1))
+
+
+  call H_apply_cisd_kpts_diexcOrg(key_in, key_mask, p1_mask, particl_1, p2_mask, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+end subroutine
+
+
+subroutine H_apply_cisd_kpts_diexcOrg(key_in,key_mask,hole_1,particl_1,hole_2, particl_2, fock_diag_tmp, i_generator, iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all double excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2), key_mask(N_int, 2)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind), intent(in)  :: hole_1(N_int,2), particl_1(N_int,2)
+  integer(bit_kind), intent(in)  :: hole_2(N_int,2), particl_2(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind), allocatable :: hole_save(:,:)
+  integer(bit_kind), allocatable :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind), allocatable :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer, allocatable           :: occ_particle(:,:), occ_hole(:,:)
+  integer, allocatable           :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+
+  double precision               :: mo_two_e_integral
+  logical                        :: is_a_two_holes_two_particles
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  integer, allocatable           :: ib_jb_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+  integer                        :: jtest_vvvv
+
+  logical :: check_double_excitation
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: b_cycle
+  logical :: yes_no
+  check_double_excitation = .True.
+  iproc = iproc_in
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+
+  
+
+
+
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2),          &
+            ib_jb_pairs(2,0:(elec_alpha_num)*mo_num))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      ASSERT (i_a > 0)
+      ASSERT (i_a <= mo_num)
+
+      do jj=1,N_elec_in_key_part_1(ispin)              !particle
+        j_a = occ_particle(jj,ispin)
+        ASSERT (j_a > 0)
+        ASSERT (j_a <= mo_num)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  logical, allocatable           :: array_pairs(:,:)
+  allocate(array_pairs(mo_num,mo_num))
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      ASSERT (i_a > 0)
+      ASSERT (i_a <= mo_num)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      ASSERT (j_a > 0)
+      ASSERT (j_a <= mo_num)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+
+      !!!! Second couple hole particle
+      do j = 1, N_int
+        hole_tmp(j,1) = iand(hole_2(j,1),hole(j,1))
+        hole_tmp(j,2) = iand(hole_2(j,2),hole(j,2))
+        particle_tmp(j,1) = iand(xor(particl_2(j,1),hole(j,1)),particl_2(j,1))
+        particle_tmp(j,2) = iand(xor(particl_2(j,2),hole(j,2)),particl_2(j,2))
+      enddo
+
+      call bitstring_to_list_ab(particle_tmp,occ_particle_tmp,N_elec_in_key_part_2,N_int)
+      call bitstring_to_list_ab(hole_tmp,occ_hole_tmp,N_elec_in_key_hole_2,N_int)
+
+      !   hole = a^(+)_j_a(ispin) a_i_a(ispin)|key_in> : single exc :: orb(i_a,ispin) --> orb(j_a,ispin)
+      hole_save = hole
+
+      ! Build array of the non-zero integrals of second excitation
+      array_pairs = .True.
+
+        if (ispin == 1) then
+        integer                        :: jjj
+
+        i=0
+        do kk = 1,N_elec_in_key_hole_2(other_spin)
+          i_b = occ_hole_tmp(kk,other_spin)
+          ASSERT (i_b > 0)
+          ASSERT (i_b <= mo_num)
+          do jjj=1,N_elec_in_key_part_2(other_spin)     ! particle
+            j_b = occ_particle_tmp(jjj,other_spin)
+            ASSERT (j_b > 0)
+            ASSERT (j_b <= mo_num)
+            if (array_pairs(i_b,j_b)) then
+              
+              i+= 1
+              ib_jb_pairs(1,i) = i_b
+              ib_jb_pairs(2,i) = j_b
+            endif
+          enddo
+        enddo
+        ib_jb_pairs(1,0) = i
+
+        do kk = 1,ib_jb_pairs(1,0)
+          hole = hole_save
+          i_b = ib_jb_pairs(1,kk)
+          j_b = ib_jb_pairs(2,kk)
+          k = shiftr(i_b-1,bit_kind_shift)+1
+          j = i_b-shiftl(k-1,bit_kind_shift)-1
+          hole(k,other_spin) = ibclr(hole(k,other_spin),j)
+          key = hole
+          k = shiftr(j_b-1,bit_kind_shift)+1
+          l = j_b-shiftl(k-1,bit_kind_shift)-1
+          key(k,other_spin) = ibset(key(k,other_spin),l)
+          
+          
+          
+          
+          
+          
+          
+          
+          
+          
+          key_idx += 1
+          do k=1,N_int
+            keys_out(k,1,key_idx) = key(k,1)
+            keys_out(k,2,key_idx) = key(k,2)
+          enddo
+          ASSERT (key_idx <= size_max)
+          if (key_idx == size_max) then
+            call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+            key_idx = 0
+          endif
+        enddo
+      endif
+
+      !   does all the single excitations of the same spin
+      i=0
+      do kk = 1,N_elec_in_key_hole_2(ispin)
+        i_b = occ_hole_tmp(kk,ispin)
+        if (i_b <= i_a.or.i_b == j_a) cycle
+        ASSERT (i_b > 0)
+        ASSERT (i_b <= mo_num)
+        do jjj=1,N_elec_in_key_part_2(ispin)     ! particule
+          j_b = occ_particle_tmp(jjj,ispin)
+          ASSERT (j_b > 0)
+          ASSERT (j_b <= mo_num)
+          if (j_b <= j_a) cycle
+          if (array_pairs(i_b,j_b)) then
+            
+            i+= 1
+            ib_jb_pairs(1,i) = i_b
+            ib_jb_pairs(2,i) = j_b
+          endif
+        enddo
+      enddo
+      ib_jb_pairs(1,0) = i
+
+      do kk = 1,ib_jb_pairs(1,0)
+        hole = hole_save
+        i_b = ib_jb_pairs(1,kk)
+        j_b = ib_jb_pairs(2,kk)
+        k = shiftr(i_b-1,bit_kind_shift)+1
+        j = i_b-shiftl(k-1,bit_kind_shift)-1
+        hole(k,ispin) = ibclr(hole(k,ispin),j)
+        key = hole
+        k = shiftr(j_b-1,bit_kind_shift)+1
+        l = j_b-shiftl(k-1,bit_kind_shift)-1
+        key(k,ispin) = ibset(key(k,ispin),l)
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        key_idx += 1
+        do k=1,N_int
+          keys_out(k,1,key_idx) = key(k,1)
+          keys_out(k,2,key_idx) = key(k,2)
+        enddo
+        ASSERT (key_idx <= size_max)
+        if (key_idx == size_max) then
+          call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+          key_idx = 0
+        endif
+      enddo ! kk
+
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_h_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, ib_jb_pairs,                              &
+      keys_out, hole_save,                                           &
+      key,hole, particle, hole_tmp,                                  &
+      particle_tmp, occ_particle,                                    &
+      occ_hole, occ_particle_tmp,                                    &
+      occ_hole_tmp,array_pairs,key_union_hole_part)
+  
+  
+end
+
+subroutine H_apply_cisd_kpts_monoexc(key_in, hole_1,particl_1,fock_diag_tmp,i_generator,iproc_in  )
+  use omp_lib
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Generate all single excitations of key_in using the bit masks of holes and
+  ! particles.
+  ! Assume N_int is already provided.
+  END_DOC
+  integer,parameter              :: size_max = 8192
+  
+  integer          ,intent(in)   :: i_generator
+  integer(bit_kind),intent(in)   :: key_in(N_int,2)
+  integer(bit_kind),intent(in)   :: hole_1(N_int,2), particl_1(N_int,2)
+  integer, intent(in)            :: iproc_in
+  double precision, intent(in)   :: fock_diag_tmp(2,mo_num+1)
+  integer(bit_kind),allocatable  :: keys_out(:,:,:)
+  integer(bit_kind),allocatable  :: hole_save(:,:)
+  integer(bit_kind),allocatable  :: key(:,:),hole(:,:), particle(:,:)
+  integer(bit_kind),allocatable  :: hole_tmp(:,:), particle_tmp(:,:)
+  integer(bit_kind),allocatable  :: hole_2(:,:), particl_2(:,:)
+  integer                        :: ii,i,jj,j,k,ispin,l
+  integer,allocatable            :: occ_particle(:,:), occ_hole(:,:)
+  integer,allocatable            :: occ_particle_tmp(:,:), occ_hole_tmp(:,:)
+  integer,allocatable            :: ib_jb_pairs(:,:)
+  integer                        :: kk,pp,other_spin,key_idx
+  integer                        :: N_elec_in_key_hole_1(2),N_elec_in_key_part_1(2)
+  integer                        :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2)
+  logical                        :: is_a_two_holes_two_particles
+  integer(bit_kind), allocatable :: key_union_hole_part(:)
+
+  integer, allocatable           :: ia_ja_pairs(:,:,:)
+  logical, allocatable           :: array_pairs(:,:)
+  double precision               :: diag_H_mat_elem
+  integer                        :: iproc
+
+  integer(bit_kind)              :: key_mask(N_int, 2)
+
+  logical :: check_double_excitation
+  logical :: is_a_2h1p
+  logical :: is_a_2h
+  logical :: is_a_1h1p
+  logical :: is_a_1h2p
+  logical :: is_a_1h
+  logical :: is_a_1p
+  logical :: is_a_2p
+  logical :: yes_no
+
+  do k=1,N_int
+    key_mask(k,1) = 0_bit_kind
+    key_mask(k,2) = 0_bit_kind
+  enddo
+
+  iproc = iproc_in
+
+  check_double_excitation = .True.
+  
+
+
+  
+
+  
+!$ iproc = omp_get_thread_num()
+  allocate (keys_out(N_int,2,size_max), hole_save(N_int,2),          &
+      key(N_int,2),hole(N_int,2), particle(N_int,2), hole_tmp(N_int,2),&
+      particle_tmp(N_int,2), occ_particle(N_int*bit_kind_size,2),    &
+      occ_hole(N_int*bit_kind_size,2), occ_particle_tmp(N_int*bit_kind_size,2),&
+      occ_hole_tmp(N_int*bit_kind_size,2),key_union_hole_part(N_int))
+  
+  !!!! First couple hole particle
+  do j = 1, N_int
+    hole(j,1) = iand(hole_1(j,1),key_in(j,1))
+    hole(j,2) = iand(hole_1(j,2),key_in(j,2))
+    particle(j,1) = iand(xor(particl_1(j,1),key_in(j,1)),particl_1(j,1))
+    particle(j,2) = iand(xor(particl_1(j,2),key_in(j,2)),particl_1(j,2))
+  enddo
+
+  call bitstring_to_list_ab(particle,occ_particle,N_elec_in_key_part_1,N_int)
+  call bitstring_to_list_ab(hole,occ_hole,N_elec_in_key_hole_1,N_int)
+  allocate (ia_ja_pairs(2,0:(elec_alpha_num)*mo_num,2))
+
+  do ispin=1,2
+    i=0
+    do ii=N_elec_in_key_hole_1(ispin),1,-1             ! hole
+      i_a = occ_hole(ii,ispin)
+      do jj=1,N_elec_in_key_part_1(ispin)                            !particule
+        j_a = occ_particle(jj,ispin)
+        i += 1
+        ia_ja_pairs(1,i,ispin) = i_a
+        ia_ja_pairs(2,i,ispin) = j_a
+      enddo
+    enddo
+    ia_ja_pairs(1,0,ispin) = i
+  enddo
+
+  key_idx = 0
+
+  integer                        :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b
+  integer(bit_kind)              :: test(N_int,2)
+  double precision               :: accu
+  accu = 0.d0
+  do ispin=1,2
+    other_spin = iand(ispin,1)+1
+    
+    do ii=1,ia_ja_pairs(1,0,ispin)
+      i_a = ia_ja_pairs(1,ii,ispin)
+      j_a = ia_ja_pairs(2,ii,ispin)
+      hole = key_in
+      k = shiftr(i_a-1,bit_kind_shift)+1
+      j = i_a-shiftl(k-1,bit_kind_shift)-1
+  
+      hole(k,ispin) = ibclr(hole(k,ispin),j)
+      k_a = shiftr(j_a-1,bit_kind_shift)+1
+      l_a = j_a-shiftl(k_a-1,bit_kind_shift)-1
+  
+      hole(k_a,ispin) = ibset(hole(k_a,ispin),l_a)
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      key_idx += 1
+      do k=1,N_int
+        keys_out(k,1,key_idx) = hole(k,1)
+        keys_out(k,2,key_idx) = hole(k,2)
+      enddo
+      if (key_idx == size_max) then
+        call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+        key_idx = 0
+      endif
+    enddo  ! ii
+    
+  enddo   ! ispin
+  call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)
+  
+  deallocate (ia_ja_pairs, &
+      keys_out, hole_save,          &
+      key,hole, particle, hole_tmp,&
+      particle_tmp, occ_particle,    &
+      occ_hole, occ_particle_tmp,&
+      occ_hole_tmp,key_union_hole_part)
+  
+  
+
+end
+
+subroutine H_apply_cisd_kpts()
+  implicit none
+  use omp_lib
+  use bitmasks
+  BEGIN_DOC
+  ! Calls H_apply on the |HF| determinant and selects all connected single and double
+  ! excitations (of the same symmetry). Auto-generated by the ``generate_h_apply`` script.
+  END_DOC
+
+  
+
+  integer                        :: i_generator
+  double precision               :: wall_0, wall_1
+  integer(bit_kind), allocatable :: mask(:,:,:)
+  integer                        :: ispin, k
+  integer                        :: iproc
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  integer :: kk,kh1,kh2,kp1,kp2
+  integer(bit_kind), allocatable :: mask_kpts(:,:,:,:)
+  
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex
+  else
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators
+  endif
+
+  call wall_time(wall_0)
+
+  iproc = 0
+  !allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_num+1) )
+  allocate( mask_kpts(N_int,2,6,kpt_num), fock_diag_tmp(2,mo_num+1) )
+  do i_generator=1,N_det_generators
+
+    ! Compute diagonal of the Fock matrix
+    call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
+
+    ! Create bit masks for holes and particles
+    do kk=1,kpt_num
+    do ispin=1,2
+      do k=1,N_int
+        mask_kpts(k,ispin,s_hole,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,s_hole,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,s_part,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,s_part,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+        mask_kpts(k,ispin,d_hole1,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_hole1,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,d_part1,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_part1,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+        mask_kpts(k,ispin,d_hole2,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_hole2,kk),  &
+            psi_det_generators(k,ispin,i_generator) )
+        mask_kpts(k,ispin,d_part2,kk) =                                 &
+            iand(generators_bitmask_kpts(k,ispin,d_part2,kk),  &
+            not(psi_det_generators(k,ispin,i_generator)) )
+      enddo
+    enddo
+    enddo
+    if(.True.)then
+      do kh1=1,kpt_num
+        do kh2=1,kpt_num
+          do kp1=1,kpt_num
+            kp2=kconserv(kh1,kh2,kp1)
+            !print*,'kh1h2p1p1',kh1,kh2,kp1,kp2
+            !print*,'size_before: ',h_apply_buffer(iproc)%n_det
+     call H_apply_cisd_kpts_diexc(psi_det_generators(1,1,i_generator),      &
+         psi_det_generators(1,1,1),                                   &
+         mask_kpts(1,1,d_hole1,kh1), mask_kpts(1,1,d_part1,kp1),                        &
+         mask_kpts(1,1,d_hole2,kh2), mask_kpts(1,1,d_part2,kp2),                        &
+         fock_diag_tmp, i_generator, iproc )
+            !print*,'size_after: ',h_apply_buffer(iproc)%n_det
+          enddo
+        enddo
+      enddo
+    endif
+    if(.True.)then
+      do kk=1,kpt_num
+     call H_apply_cisd_kpts_monoexc(psi_det_generators(1,1,i_generator),    &
+         mask_kpts(1,1,s_hole,kk), mask_kpts(1,1,s_part,kk),                        &
+         fock_diag_tmp, i_generator, iproc )
+      enddo
+    endif
+    call wall_time(wall_1)
+    
+    if (wall_1 - wall_0 > 2.d0) then
+        write(6,*)  &
+       100.*float(i_generator)/float(N_det_generators), '% in ', wall_1-wall_0, 's'
+        wall_0 = wall_1
+    endif
+  enddo
+
+  !deallocate( mask, fock_diag_tmp )
+  deallocate( mask_kpts, fock_diag_tmp )
+
+  call copy_H_apply_buffer_to_wf
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex N_det
+  else
+    SOFT_TOUCH psi_det psi_coef N_det
+  endif
+
+  
+  ! Sort H_jj to find the N_states lowest states
+  integer                        :: i
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: H_jj(:)
+  double precision, external     :: diag_h_mat_elem
+  allocate(H_jj(N_det),iorder(N_det))
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP SHARED(psi_det,N_int,H_jj,iorder,N_det)                  &
+      !$OMP PRIVATE(i)
+  !$OMP DO
+  do i = 1, N_det
+    H_jj(i) = diag_h_mat_elem(psi_det(1,1,i),N_int)
+    iorder(i) = i
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dsort(H_jj,iorder,N_det)
+  if (is_complex) then
+    do k=1,N_states
+      psi_coef_complex(iorder(k),k) = (1.d0,0.d0)
+    enddo
+  else
+    do k=1,N_states
+      psi_coef(iorder(k),k) = 1.d0
+    enddo
+  endif
+  deallocate(H_jj,iorder)
+    
+
+end
+
diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f
index aed81063..ae3473d1 100644
--- a/src/davidson/davidson_parallel.irp.f
+++ b/src/davidson/davidson_parallel.irp.f
@@ -89,21 +89,97 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   character*(512)                :: msg
   integer                        :: imin, imax, ishift, istep
 
-  integer, allocatable           :: psi_det_read(:,:,:)
-  double precision, allocatable  :: v_t(:,:), s_t(:,:), u_t(:,:)
-
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t, s_t
-
-  ! Get wave function (u_t)
-  ! -----------------------
-
   integer                        :: rc, ni, nj
   integer*8                      :: rc8
   integer                        :: N_states_read, N_det_read, psi_det_size_read
   integer                        :: N_det_selectors_read, N_det_generators_read
 
-  integer, external :: zmq_get_dvector
+  integer, allocatable           :: psi_det_read(:,:,:)
+  logical :: sending
+  integer, external :: get_task_from_taskserver
+  integer, external :: task_done_to_taskserver
+  integer :: k
+  integer :: ierr
+ 
+
+!  integer, external :: zmq_get_dvector
   integer, external :: zmq_get_dmatrix
+  integer, external :: zmq_get_cdmatrix
+  IRP_IF MPI
+    include 'mpif.h'
+  IRP_ENDIF
+ 
+  if (is_complex) then
+    complex*16, allocatable  :: v_tc(:,:), s_tc(:,:), u_tc(:,:)
+
+    !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_tc, v_tc, s_tc
+
+
+    ! Get wave function (u_tc)
+    ! -----------------------
+
+    PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
+    PROVIDE psi_bilinear_matrix_transp_values_complex psi_bilinear_matrix_values_complex psi_bilinear_matrix_columns_loc
+    PROVIDE ref_bitmask_energy nproc
+    PROVIDE mpi_initialized
+
+    allocate(u_tc(N_st,N_det))
+    
+    !todo: resize for complex? (should be okay)
+    ! Warning : dimensions are modified for efficiency, It is OK since we get the
+    ! full matrix
+    if (size(u_tc,kind=8) < 8388608_8) then
+      ni = size(u_tc)
+      nj = 1
+    else
+      ni = 8388608
+      nj = int(size(u_tc,kind=8)/8388608_8,4) + 1
+    endif
+
+    do while (zmq_get_cdmatrix(zmq_to_qp_run_socket, worker_id, 'u_tc', u_tc, ni, nj, size(u_tc,kind=8)) == -1)
+      print *,  'mpi_rank, N_states_diag, N_det'
+      print *,  mpi_rank, N_states_diag, N_det
+      stop 'u_tc'
+    enddo
+
+    IRP_IF MPI
+!      include 'mpif.h'
+      call broadcast_chunks_complex_double(u_tc,size(u_tc,kind=8))
+    IRP_ENDIF
+
+    ! Run tasks
+    ! ---------
+
+    sending=.False.
+
+    allocate(v_tc(N_st,N_det), s_tc(N_st,N_det))
+    do
+      if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then
+        exit
+      endif
+      if(task_id == 0) exit
+      read (msg,*) imin, imax, ishift, istep
+      do k=imin,imax
+        v_tc(:,k) = (0.d0,0.d0)
+        s_tc(:,k) = (0.d0,0.d0)
+      enddo
+      call h_s2_u_0_nstates_openmp_work_complex(v_tc,s_tc,u_tc,N_st,N_det,imin,imax,ishift,istep)
+      if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
+          print *,  irp_here, 'Unable to send task_done'
+      endif
+      call davidson_push_results_async_recv(zmq_socket_push, sending)
+      call davidson_push_results_async_send_complex(zmq_socket_push, v_tc, s_tc, imin, imax, task_id, sending)
+    end do
+    deallocate(u_tc,v_tc, s_tc)
+    call davidson_push_results_async_recv(zmq_socket_push, sending)
+  else
+  double precision, allocatable  :: v_t(:,:), s_t(:,:), u_t(:,:)
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t, s_t
+
+
+  ! Get wave function (u_t)
+  ! -----------------------
 
   PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
   PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
@@ -129,29 +205,22 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   enddo
 
   IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-
+    !include 'mpif.h'
     call broadcast_chunks_double(u_t,size(u_t,kind=8))
-
   IRP_ENDIF
 
   ! Run tasks
   ! ---------
 
-  logical :: sending
   sending=.False.
 
   allocate(v_t(N_st,N_det), s_t(N_st,N_det))
   do
-    integer, external :: get_task_from_taskserver
-    integer, external :: task_done_to_taskserver
     if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then
       exit
     endif
     if(task_id == 0) exit
     read (msg,*) imin, imax, ishift, istep
-    integer :: k
     do k=imin,imax
       v_t(:,k) = 0.d0
       s_t(:,k) = 0.d0
@@ -165,7 +234,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
   end do
   deallocate(u_t,v_t, s_t)
   call davidson_push_results_async_recv(zmq_socket_push, sending)
-
+  endif
 end subroutine
 
 
@@ -538,6 +607,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
 end
 
 
+
 BEGIN_PROVIDER [ integer, nthreads_davidson ]
  implicit none
  BEGIN_DOC
@@ -648,3 +718,360 @@ integer function zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id)
   IRP_ENDIF
 end
 
+
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+subroutine davidson_push_results_complex(zmq_socket_push, v_t, s_t, imin, imax, task_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Push the results of $H | U \rangle$ from a worker to the master.
+  END_DOC
+
+  integer(ZMQ_PTR)    ,intent(in)    :: zmq_socket_push
+  integer             ,intent(in)    :: task_id, imin, imax
+  complex*16          ,intent(in)    :: v_t(N_states_diag,N_det)
+  complex*16          ,intent(in)    :: s_t(N_states_diag,N_det)
+  integer                            :: rc, sz
+  integer*8                          :: rc8
+
+  sz = (imax-imin+1)*N_states_diag
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_complex failed to push task_id'
+
+  rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_complex failed to push imin'
+
+  rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_complex failed to push imax'
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz*2, ZMQ_SNDMORE)
+  if(rc8 /= 8_8*sz*2) then
+    print*,irp_here,' rc8 = ',rc8
+    print*,irp_here,'  sz = ',sz
+    print*,'rc8 /= sz*8'
+    stop 'davidson_push_results_complex failed to push vt'
+  endif
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_send8( zmq_socket_push, s_t(1,imin), 8_8*sz*2, 0)
+  if(rc8 /= 8_8*sz*2) stop 'davidson_push_results_complex failed to push st'
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if ((rc /= 2).and.(ok(1:2)/='ok')) then
+    print *, irp_here, ': f77_zmq_recv( zmq_socket_push, ok, 2, 0)'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+subroutine davidson_push_results_async_send_complex(zmq_socket_push, v_t, s_t, imin, imax, task_id,sending)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Push the results of $H | U \rangle$ from a worker to the master.
+  END_DOC
+
+  integer(ZMQ_PTR)    ,intent(in)    :: zmq_socket_push
+  integer             ,intent(in)    :: task_id, imin, imax
+  complex*16          ,intent(in)    :: v_t(N_states_diag,N_det)
+  complex*16          ,intent(in)    :: s_t(N_states_diag,N_det)
+  logical             ,intent(inout) :: sending
+  integer                            :: rc, sz
+  integer*8                          :: rc8
+
+  if (sending) then
+    print *,  irp_here, ': sending=true'
+    stop -1
+  endif
+  sending = .True.
+
+  sz = (imax-imin+1)*N_states_diag
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_async_send_complex failed to push task_id'
+
+  rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_async_send_complex failed to push imin'
+
+  rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
+  if(rc /= 4) stop 'davidson_push_results_async_send_complex failed to push imax'
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz*2, ZMQ_SNDMORE)
+  if(rc8 /= 8_8*sz*2) then
+    print*,irp_here,' rc8 = ',rc8
+    print*,irp_here,'  sz = ',sz
+    print*,'rc8 /= sz*8'
+    stop 'davidson_push_results_async_send_complex failed to push vt'
+  endif
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_send8( zmq_socket_push, s_t(1,imin), 8_8*sz*2, 0)
+  if(rc8 /= 8_8*sz*2) stop 'davidson_push_results_async_send_complex failed to push st'
+
+end subroutine
+
+
+subroutine davidson_pull_results_complex(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Pull the results of $H | U \rangle$ on the master.
+  END_DOC
+
+  integer(ZMQ_PTR)    ,intent(in)     :: zmq_socket_pull
+  integer             ,intent(out)    :: task_id, imin, imax
+  complex*16          ,intent(out)    :: v_t(N_states_diag,N_det)
+  complex*16          ,intent(out)    :: s_t(N_states_diag,N_det)
+
+  integer                            :: rc, sz
+  integer*8                          :: rc8
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
+  if(rc /= 4) stop 'davidson_pull_results failed to pull task_id'
+
+  rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0)
+  if(rc /= 4) stop 'davidson_pull_results failed to pull imin'
+
+  rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0)
+  if(rc /= 4) stop 'davidson_pull_results failed to pull imax'
+
+  sz = (imax-imin+1)*N_states_diag
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_recv8( zmq_socket_pull, v_t(1,imin), 8_8*sz*2, 0)
+  if(rc8 /= 8*sz*2) stop 'davidson_pull_results_complex failed to pull v_t'
+
+  !todo: double sz for complex? (done)
+  rc8 = f77_zmq_recv8( zmq_socket_pull, s_t(1,imin), 8_8*sz*2, 0)
+  if(rc8 /= 8*sz*2) stop 'davidson_pull_results_complex failed to pull s_t'
+
+! Activate if zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc /= 2) then
+    print *,  irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine davidson_collector_complex(zmq_to_qp_run_socket, zmq_socket_pull, v0, s0, sze, N_st)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Routine collecting the results of the workers in Davidson's algorithm.
+  END_DOC
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  integer, intent(in)            :: sze, N_st
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+
+  complex*16      ,intent(inout) :: v0(sze, N_st)
+  complex*16      ,intent(inout) :: s0(sze, N_st)
+
+  integer                          :: more, task_id, imin, imax
+
+  complex*16, allocatable        :: v_t(:,:), s_t(:,:)
+  logical :: sending
+  integer :: i,j
+  integer, external :: zmq_delete_task_async_send
+  integer, external :: zmq_delete_task_async_recv
+
+  allocate(v_t(N_st,N_det), s_t(N_st,N_det))
+  v0 = (0.d0,0.d0)
+  s0 = (0.d0,0.d0)
+  more = 1
+  sending = .False.
+  do while (more == 1)
+    call davidson_pull_results_complex(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
+    if (zmq_delete_task_async_send(zmq_to_qp_run_socket,task_id,sending) == -1) then
+      stop 'davidson: Unable to delete task (send)'
+    endif
+    do j=1,N_st
+      do i=imin,imax
+        v0(i,j) = v0(i,j) + v_t(j,i)
+        s0(i,j) = s0(i,j) + s_t(j,i)
+      enddo
+    enddo
+    if (zmq_delete_task_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then
+      stop 'davidson: Unable to delete task (recv)'
+    endif
+  end do
+  deallocate(v_t,s_t)
+
+end subroutine
+
+
+
+subroutine h_s2_u_0_nstates_zmq_complex(v_0,s_0,u_0,N_st,sze)
+  !todo: maybe make separate zmq_put_psi_complex?
+  !print*,irp_here,' not implemented for complex'
+  !stop -1
+  use omp_lib
+  use bitmasks
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2  | u_0\rangle$
+  !
+  ! n : number of determinants
+  !
+  ! H_jj : array of $\langle j | H | j \rangle$
+  !
+  ! S2_jj : array of $\langle j | S^2 | j \rangle$
+  END_DOC
+  integer, intent(in)            :: N_st, sze
+  complex*16, intent(out)        :: v_0(sze,N_st), s_0(sze,N_st)
+  complex*16, intent(inout)      :: u_0(sze,N_st)
+  integer                        :: i,j,k
+  integer                        :: ithread
+  complex*16, allocatable        :: u_tc(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_tc
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
+  PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
+  PROVIDE psi_bilinear_matrix_transp_values_complex psi_bilinear_matrix_values_complex psi_bilinear_matrix_columns_loc
+  PROVIDE ref_bitmask_energy nproc
+  PROVIDE mpi_initialized
+
+  call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson')
+
+!  integer :: N_states_diag_save
+!  N_states_diag_save = N_states_diag
+!  N_states_diag = N_st
+  if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
+    stop 'Unable to put N_states_diag on ZMQ server'
+  endif
+
+  if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+    stop 'Unable to put psi on ZMQ server'
+  endif
+  energy = 0.d0
+  if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then
+    stop 'Unable to put energy on ZMQ server'
+  endif
+
+
+  ! Create tasks
+  ! ============
+
+  integer :: istep, imin, imax, ishift, ipos
+  integer, external :: add_task_to_taskserver
+  integer, parameter :: tasksize=10000
+  character*(100000) :: task
+  istep=1
+  ishift=0
+  imin=1
+
+
+  ipos=1
+  do imin=1,N_det,tasksize
+    imax = min(N_det,imin-1+tasksize)
+    do ishift=0,istep-1
+      write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|'
+      ipos = ipos+50
+      if (ipos > 100000-50) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task'
+        endif
+        ipos=1
+      endif
+    enddo
+  enddo
+
+  if (ipos > 1) then
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+      stop 'Unable to add task'
+    endif
+    ipos=1
+  endif
+
+  allocate(u_tc(N_st,N_det))
+  do k=1,N_st
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
+  enddo
+
+  call cdtranspose(                                                   &
+      u_0,                                                           &
+      size(u_0, 1),                                                  &
+      u_tc,                                                           &
+      size(u_tc, 1),                                                  &
+      N_det, N_st)
+
+
+  ASSERT (N_st == N_states_diag)
+  ASSERT (sze >= N_det)
+
+  integer :: rc, ni, nj
+  integer*8 :: rc8
+  double precision :: energy(N_st)
+
+  integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
+  integer, external :: zmq_put_cdmatrix
+  if (size(u_tc,kind=8) < 8388608_8) then
+    ni = size(u_tc)
+    nj = 1
+  else
+    ni = 8388608
+    nj = int(size(u_tc,kind=8)/8388608_8,4) + 1
+  endif
+  ! Warning : dimensions are modified for efficiency, It is OK since we get the
+  ! full matrix
+  if (zmq_put_cdmatrix(zmq_to_qp_run_socket, 1, 'u_tc', u_tc, ni, nj, size(u_tc,kind=8)) == -1) then
+    stop 'Unable to put u_tc on ZMQ server'
+  endif
+
+  deallocate(u_tc)
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  call omp_set_nested(.True.)
+  !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
+  ithread = omp_get_thread_num()
+  if (ithread == 0 ) then
+    call davidson_collector_complex(zmq_to_qp_run_socket, zmq_socket_pull, v_0, s_0, N_det, N_st)
+  else
+    call davidson_slave_inproc(1)
+  endif
+  !$OMP END PARALLEL
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson')
+
+  !$OMP PARALLEL
+  !$OMP SINGLE
+  do k=1,N_st
+    !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
+    call cdset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    !$OMP END TASK
+    !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
+    call cdset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    !$OMP END TASK
+    !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    !$OMP END TASK
+  enddo
+  !$OMP END SINGLE
+  !$OMP TASKWAIT
+  !$OMP END PARALLEL
+
+!  N_states_diag = N_states_diag_save
+!  SOFT_TOUCH N_states_diag
+end
+
diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f
index aa748628..ccde8965 100644
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@@ -33,9 +33,16 @@ BEGIN_PROVIDER [ integer, dressed_column_idx, (N_states) ]
  integer :: i
  double precision :: tmp
  integer, external :: idamax
+ if (is_complex) then
+   do i=1,N_states
+     !todo: check for complex
+     dressed_column_idx(i) = idamax(N_det, cdabs(psi_coef_complex(1,i)), 1)
+   enddo
+ else
  do i=1,N_states
    dressed_column_idx(i) = idamax(N_det, psi_coef(1,i), 1)
  enddo
+ endif
 END_PROVIDER
 
 subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_diag,Nint,dressing_state,converged)
@@ -721,7 +728,754 @@ end
 
 
 
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+subroutine davidson_diag_hs2_complex(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_diag,Nint,dressing_state,converged)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  complex*16, intent(inout)      :: u_in(dim_in,N_st_diag)
+  double precision, intent(out)  :: energies(N_st_diag), s2_out(N_st_diag)
+  integer, intent(in)            :: dressing_state
+  logical, intent(out)           :: converged
+  double precision, allocatable  :: H_jj(:)
+
+  double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
+  integer                        :: i,k
+  ASSERT (N_st > 0)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint)
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP  SHARED(sze,H_jj, dets_in,Nint)                    &
+      !$OMP  PRIVATE(i)
+  !$OMP DO SCHEDULE(static)
+  do i=2,sze
+    H_jj(i)  = diag_H_mat_elem(dets_in(1,1,i),Nint)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (dressing_state > 0) then
+    !todo: implement for complex
+    print*,irp_here,' not implemented for complex if dressing_state > 0'
+    stop -1
+    do k=1,N_st
+      do i=1,sze
+        H_jj(i)  += dble(u_in(i,k) * dressing_column_h(i,k))
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_hjj_sjj_complex(dets_in,u_in,H_jj,S2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,dressing_state,converged)
+  deallocate (H_jj)
+end
+
+
+subroutine davidson_diag_hjj_sjj_complex(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_st,N_st_diag_in,Nint,dressing_state,converged)
+  use bitmasks
+  use mmap_module
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! S2_out : Output : s^2
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, N_st, N_st_diag_in, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision,  intent(in)  :: H_jj(sze)
+  integer, intent(in)            :: dressing_state
+  double precision, intent(inout) :: s2_out(N_st_diag_in)
+  complex*16,      intent(inout) :: u_in(dim_in,N_st_diag_in)
+  double precision, intent(out)  :: energies(N_st_diag_in)
+
+  integer                        :: iter, N_st_diag
+  integer                        :: i,j,k,l,m
+  logical, intent(inout)         :: converged
+
+  double precision, external     :: u_dot_u_complex
+  complex*16, external           :: u_dot_v_complex
+
+  integer                        :: k_pairs, kl
+
+  integer                        :: iter2, itertot
+  double precision, allocatable  :: lambda(:), s2(:)
+  complex*16, allocatable  :: y(:,:), h(:,:), h_p(:,:)
+  complex*8, allocatable              :: y_s(:,:)
+  complex*16, allocatable  :: s_(:,:), s_tmp(:,:)
+  double precision               :: diag_h_mat_elem
+  double precision, allocatable  :: residual_norm(:)
+  character*(16384)              :: write_buffer
+  double precision               :: to_print(3,N_st)
+  double precision               :: cpu, wall
+  integer                        :: shift, shift2, itermax, istate
+  double precision               :: r1, r2, alpha
+  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
+  integer                        :: nproc_target
+  integer                        :: order(N_st_diag_in)
+  double precision               :: cmax
+  double precision, allocatable  :: overlap(:,:)
+  complex*16, allocatable  :: y_tmp(:,:)
+  complex*16, allocatable  :: S_d(:,:)
+  complex*16, allocatable  :: U(:,:)
+  complex*16, pointer      :: W(:,:)
+  complex*8, pointer                  :: S(:,:)
+  logical                        :: disk_based
+  double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda
+  if (N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
+  itertot = 0
+
+  if (state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+    allocate(y_tmp(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))
+    allocate(y_tmp(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+  y_tmp = (0.d0,0.d0)
+
+  !todo: provide psi_bilinear_matrix_values? (unlinked now)
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    !r1 = 8.d0 *                                   &! bytes
+    !     ( dble(sze)*(N_st_diag*itermax)          &! U
+    !     + 1.5d0*dble(sze*m)*(N_st_diag*itermax)  &! W,S
+    !     + 1.d0*dble(sze)*(N_st_diag)             &! S_d
+    !     + 4.5d0*(N_st_diag*itermax)**2           &! h,y,y_s,s_,s_tmp
+    !     + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+    !     + 1.d0*(N_st_diag)                       &! residual_norm
+    !                                               ! In H_S2_u_0_nstates_zmq
+    !     + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+    !     + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+    !     + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+    !     + nproc_target *                         &! In OMP section
+    !       ( 1.d0*(N_int*maxab)                   &! buffer
+    !       + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+    !     ) / 1024.d0**3
+    r1 = 8.d0 *                                   &! bytes
+         ( 2*dble(sze)*(N_st_diag*itermax)          &! U
+         + 2*1.5d0*dble(sze*m)*(N_st_diag*itermax)  &! W,S
+         + 2*1.d0*dble(sze)*(N_st_diag)             &! S_d
+         + 2*4.5d0*(N_st_diag*itermax)**2           &! h,y,y_s,s_,s_tmp
+         + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_S2_u_0_nstates_zmq
+         + 2*3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+         + 2*3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+         + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if (nproc_target == 0) then
+      call check_mem(r1,irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if (r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if (itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m=0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+  call write_int(6,N_st,'Number of states')
+  call write_int(6,N_st_diag,'Number of states in diagonalization')
+  call write_int(6,sze,'Number of determinants')
+  call write_int(6,nproc_target,'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if (disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy          S^2       Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+  !todo: already resized, but do we need to change c_f_pointer for complex?
+  if (disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8*2, fd_w, .False., ptr_w)
+    call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        4*2, fd_s, .False., ptr_s)
+    call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/))
+    call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/))
+  else
+    !allocate(W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax))
+    allocate(W(sze,N_st_diag*itermax))
+    allocate(S(sze,N_st_diag*itermax))
+  endif
+
+  !allocate(                                                          &
+  !    ! Large
+  !    U(sze,N_st_diag*itermax),                                      &
+  !    S_d(sze,N_st_diag),                                            &
+
+  !    ! Small
+  !    h(N_st_diag*itermax,N_st_diag*itermax),                        &
+  !    h_p(N_st_diag*itermax,N_st_diag*itermax),                      &
+  !    y(N_st_diag*itermax,N_st_diag*itermax),                        &
+  !    s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+  !    s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+  !    residual_norm(N_st_diag),                                      &
+  !    s2(N_st_diag*itermax),                                         &
+  !    y_s(N_st_diag*itermax,N_st_diag*itermax),                      &
+  !    lambda(N_st_diag*itermax))
+      allocate(U(sze,N_st_diag*itermax))
+      allocate(S_d(sze,N_st_diag))
+      allocate(h(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(h_p(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(y(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(s_(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(s_tmp(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(residual_norm(N_st_diag))
+      allocate(s2(N_st_diag*itermax))
+      allocate(y_s(N_st_diag*itermax,N_st_diag*itermax))
+      allocate(lambda(N_st_diag*itermax))
+
+  h = (0.d0,0.d0)
+  U = (0.d0,0.d0)
+  y = (0.d0,0.d0)
+  s_ = (0.d0,0.d0)
+  s_tmp = (0.d0,0.d0)
+  W = (0.d0,0.d0)
+  S = (0.e0,0.e0)
+  S_d = (0.d0,0.d0)
+  h_p = (0.d0,0.d0)
+  residual_norm = 0.d0
+  s2 = 0.d0
+  y_s = (0.e0,0.e0)
+  lambda = 0.d0
 
 
 
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  do k=N_st+1,N_st_diag
+    u_in(k,k) = (10.d0,0.d0)
+    do i=1,sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      !todo: real or complex? rescale for complex? sqrt(2)?
+      u_in(i,k) = dcmplx(r1*dcos(r2),0.d0)
+      !u_in(i,k) = dcmplx(r1*dcos(r2),r1*dsin(r2))
+    enddo
+    u_in(k,k) = (10.d0,0.d0)
+  enddo
+  do k=1,N_st_diag
+    call normalize_complex(u_in(1,k),sze)
+  enddo
+
+  do k=1,N_st_diag
+    do i=1,sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+
+  do while (.not.converged)
+    itertot = itertot+1
+    if (itertot == 8) then
+      exit
+    endif
+
+    do iter=1,itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+      if ((iter > 1).or.(itertot == 1)) then
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        if (disk_based) then
+          call ortho_qr_unblocked_complex(U,size(U,1),sze,shift2)
+          call ortho_qr_unblocked_complex(U,size(U,1),sze,shift2)
+        else
+          call ortho_qr_complex(U,size(U,1),sze,shift2)
+          call ortho_qr_complex(U,size(U,1),sze,shift2)
+        endif
+
+        ! |W> = H|U>
+        ! |S_d> = S^2|U>
+        if ((sze > 100000).and.distributed_davidson) then
+            call h_s2_u_0_nstates_zmq_complex(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
+        else
+            call h_s2_u_0_nstates_openmp_complex(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
+        endif
+        S(1:sze,shift+1:shift+N_st_diag) = cmplx(S_d(1:sze,1:N_st_diag))
+      else
+         ! Already computed in update below
+         continue
+      endif
+
+!      if (dressing_state > 0) then
+!        !todo: implement for complex
+!        print*,irp_here,' not implemented for complex (dressed)'
+!        stop -1
+!!
+!!        if (N_st == 1) then
+!!
+!!          l = dressed_column_idx(1)
+!!          complex*16 :: f
+!!          !todo: check for complex
+!!          f = (1.0d0,0.d0)/psi_coef(l,1)
+!!          do istate=1,N_st_diag
+!!            do i=1,sze
+!!              !todo: conjugate?
+!!              W(i,shift+istate) += dressing_column_h_complex(i,1) *f * U(l,shift+istate)
+!!              W(l,shift+istate) += dressing_column_h_complex(i,1) *f * U(i,shift+istate)
+!!              S(i,shift+istate) += cmplx(dressing_column_s_complex(i,1) *f * U(l,shift+istate))
+!!              S(l,shift+istate) += cmplx(dressing_column_s_complex(i,1) *f * U(i,shift+istate))
+!!            enddo
+!!
+!!          enddo
+!!
+!!        else
+!!
+!!          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+!!            psi_coef, size(psi_coef,1), &
+!!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!!
+!!          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+!!            dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), &
+!!            1.d0, W(1,shift+1), size(W,1))
+!!
+!!          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+!!            dressing_column_s, size(dressing_column_s,1), s_tmp, size(s_tmp,1), &
+!!            1.d0, S_d, size(S_d,1))
+!!
+!!
+!!          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+!!            dressing_column_h, size(dressing_column_h,1), &
+!!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!!
+!!          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+!!            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
+!!            1.d0, W(1,shift+1), size(W,1))
+!!
+!!          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+!!            dressing_column_s, size(dressing_column_s,1), &
+!!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!!
+!!          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+!!            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
+!!            1.d0, S_d, size(S_d,1))
+!!
+!!        endif
+!      endif
+
+      ! Compute s_kl = <u_k | S_l> = <u_k| S2 |u_l>
+      ! -------------------------------------------
+
+       !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2)
+       do j=1,shift2
+         do i=1,shift2
+           s_(i,j) = (0.d0,0.d0)
+           do k=1,sze
+             s_(i,j) = s_(i,j) + dconjg(U(k,i)) * dcmplx(S(k,j))
+           enddo
+          enddo
+        enddo
+        !$OMP END PARALLEL DO
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      !todo: why not size(h,1)?
+      call zgemm('C','N', shift2, shift2, sze,                       &
+          (1.d0,0.d0), U, size(U,1), W, size(W,1),                   &
+          (0.d0,0.d0), h, size(h,1))
+
+      ! Penalty method
+      ! --------------
+
+      if (s2_eig) then
+        h_p = s_
+        do k=1,shift2
+          h_p(k,k) = h_p(k,k) + (S_z2_Sz - expected_s2)
+        enddo
+        if (only_expected_s2) then
+          alpha = 0.1d0
+          h_p = h + alpha*h_p
+        else
+          alpha = 0.0001d0
+          h_p = h + alpha*h_p
+        endif
+      else
+        h_p = h
+        alpha = 0.d0
+      endif
+
+      ! Diagonalize h_p
+      ! ---------------
+
+      call lapack_diag_complex(lambda,y,h_p,size(h_p,1),shift2)
+
+      ! Compute Energy for each eigenvector
+      ! -----------------------------------
+
+      call zgemm('N','N',shift2,shift2,shift2,                       &
+          (1.d0,0.d0), h, size(h,1), y, size(y,1),                   &
+          (0.d0,0.d0), s_tmp, size(s_tmp,1))
+
+      call zgemm('C','N',shift2,shift2,shift2,                       &
+          (1.d0,0.d0), y, size(y,1), s_tmp, size(s_tmp,1),           &
+          (0.d0,0.d0), h, size(h,1))
+
+      do k=1,shift2
+        lambda(k) = dble(h(k,k))
+      enddo
+
+      ! Compute S2 for each eigenvector
+      ! -------------------------------
+
+      call zgemm('N','N',shift2,shift2,shift2,                       &
+          (1.d0,0.d0), s_, size(s_,1), y, size(y,1),                 &
+          (0.d0,0.d0), s_tmp, size(s_tmp,1))
+
+      call zgemm('C','N',shift2,shift2,shift2,                       &
+          (1.d0,0.d0), y, size(y,1), s_tmp, size(s_tmp,1),           &
+          (0.d0,0.d0), s_, size(s_,1))
+
+      do k=1,shift2
+        s2(k) = dble(s_(k,k)) + S_z2_Sz
+      enddo
+
+      if (only_expected_s2) then
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        do k=1,size(state_ok)
+          state_ok(k) = .True.
+        enddo
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call zswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do k=1,shift2
+          do i=1,shift2
+            overlap(k,i) = cdabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        y_tmp = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = y_tmp(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+
+      endif
+
+
+      ! Express eigenvectors of h in the determinant basis
+      ! --------------------------------------------------
+      !todo: check for complex
+      call zgemm('N','N', sze, N_st_diag, shift2,                    &
+          (1.d0,0.d0), U, size(U,1), y, size(y,1), (0.d0,0.d0), U(1,shift2+1), size(U,1))
+      call zgemm('N','N', sze, N_st_diag, shift2,                    &
+          (1.d0,0.d0), W, size(W,1), y, size(y,1), (0.d0,0.d0), W(1,shift2+1), size(W,1))
+
+      y_s(:,:) = cmplx(y(:,:))
+      call cgemm('N','N', sze, N_st_diag, shift2,                    &
+          (1.e0,0.e0), S, size(S,1), y_s, size(y_s,1), (0.e0,0.e0), S(1,shift2+1), size(S,1))
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k=1,N_st_diag
+        do i=1,sze
+          U(i,shift2+k) =  &
+            (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+              /max(H_jj(i) - lambda (k),1.d-2)
+        enddo
+
+        if (k <= N_st) then
+          residual_norm(k) = u_dot_u_complex(U(1,shift2+k),sze)
+          to_print(1,k) = lambda(k) + nuclear_repulsion
+          to_print(2,k) = s2(k)
+          to_print(3,k) = residual_norm(k)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+
+
+      if ((itertot>1).and.(iter == 1)) then
+        !don't print 
+        continue
+      else
+        write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st)
+      endif
+
+      ! Check convergence
+      if (iter > 1) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+      endif   
+      
+
+      do k=1,N_st
+        if (residual_norm(k) > 1.e8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if (converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if (qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U and update S and W
+    ! --------------------------------
+
+    call cgemm('N','N', sze, N_st_diag, shift2, (1.e0,0.e0),      &
+        S, size(S,1), y_s, size(y_s,1), (0.e0,0.e0), S(1,shift2+1), size(S,1))
+    do k=1,N_st_diag
+      do i=1,sze
+        S(i,k) = S(i,shift2+k)
+      enddo
+    enddo
+
+    call zgemm('N','N', sze, N_st_diag, shift2, (1.d0,0.d0),      &
+        W, size(W,1), y, size(y,1), (0.d0,0.d0), u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call zgemm('N','N', sze, N_st_diag, shift2, (1.d0,0.d0),      &
+        U, size(U,1), y, size(y,1), (0.d0,0.d0), u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+    if (disk_based) then
+      call ortho_qr_unblocked_complex(U,size(U,1),sze,N_st_diag)
+      call ortho_qr_unblocked_complex(U,size(U,1),sze,N_st_diag)
+    else
+      call ortho_qr_complex(U,size(U,1),sze,N_st_diag)
+      call ortho_qr_complex(U,size(U,1),sze,N_st_diag)
+    endif
+    do j=1,N_st_diag
+      k=1
+      do while ((k<sze).and.(U(k,j) == (0.d0,0.d0)))
+        k = k+1
+      enddo
+      !if (U(k,j) * u_in(k,j) < 0.d0) then
+      !todo: complex! maybe change criterion here?
+      ! if U is close to u_in, then arg(conjg(U)*u_in) will be near zero
+      if (dble(dconjg(U(k,j)) * u_in(k,j)) < 0.d0) then
+        do i=1,sze
+          W(i,j) = -W(i,j)
+          S(i,j) = -S(i,j)
+        enddo
+      endif
+    enddo
+    do j=1,N_st_diag
+      do i=1,sze
+        S_d(i,j) = cmplx(S(i,j))
+      enddo
+    enddo
+
+  enddo
+
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
+    s2_out(k) = s2(k)
+  enddo
+  write_buffer = '======'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if (disk_based)then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 2*8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 2*4, fd_s, ptr_s )
+    fd_s = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_s','r')
+    close(fd_s,status='delete')
+  else
+    deallocate(W,S)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, overlap, y_tmp,                                             &
+      h, y_s, S_d,                                                   &
+      y, s_, s_tmp,                                                  &
+      lambda                                                         &
+      )
+  FREE nthreads_davidson
+end
+
 
diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f
index 33e9478a..1dde4c63 100644
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@@ -20,8 +20,21 @@ BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]
 END_PROVIDER
 
  BEGIN_PROVIDER [ double precision, CI_electronic_energy, (N_states_diag) ]
-&BEGIN_PROVIDER [ double precision, CI_eigenvectors, (N_det,N_states_diag) ]
 &BEGIN_PROVIDER [ double precision, CI_s2, (N_states_diag) ]
+  implicit none
+  if (is_complex) then
+    ci_s2(1:N_states_diag) = ci_s2_complex(1:N_states_diag)
+    ci_electronic_energy(1:N_states_diag) = ci_electronic_energy_complex(1:N_states_diag)
+  else
+    ci_s2(1:N_states_diag) = ci_s2_real(1:N_states_diag)
+    ci_electronic_energy(1:N_states_diag) = ci_electronic_energy_real(1:N_states_diag)
+  endif
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, CI_electronic_energy_real, (N_states_diag) ]
+&BEGIN_PROVIDER [ double precision, CI_eigenvectors, (N_det,N_states_diag) ]
+&BEGIN_PROVIDER [ double precision, CI_s2_real, (N_states_diag) ]
    BEGIN_DOC
    ! Eigenvectors/values of the |CI| matrix
    END_DOC
@@ -57,8 +70,8 @@ END_PROVIDER
 
    if (diag_algorithm == "Davidson") then
 
-     call davidson_diag_HS2(psi_det,CI_eigenvectors, CI_s2, &
-         size(CI_eigenvectors,1),CI_electronic_energy,               &
+     call davidson_diag_HS2(psi_det,CI_eigenvectors, CI_s2_real, &
+         size(CI_eigenvectors,1),CI_electronic_energy_real,               &
          N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
 
      integer :: N_states_diag_save
@@ -75,17 +88,17 @@ END_PROVIDER
         allocate (CI_eigenvectors_tmp (N_det,N_states_diag) )
         allocate (CI_s2_tmp (N_states_diag) )
 
-        CI_electronic_energy_tmp(1:N_states_diag_save) = CI_electronic_energy(1:N_states_diag_save)
+        CI_electronic_energy_tmp(1:N_states_diag_save) = CI_electronic_energy_real(1:N_states_diag_save)
         CI_eigenvectors_tmp(1:N_det,1:N_states_diag_save) = CI_eigenvectors(1:N_det,1:N_states_diag_save)
-        CI_s2_tmp(1:N_states_diag_save) = CI_s2(1:N_states_diag_save)
+        CI_s2_tmp(1:N_states_diag_save) = CI_s2_real(1:N_states_diag_save)
 
         call davidson_diag_HS2(psi_det,CI_eigenvectors_tmp, CI_s2_tmp, &
             size(CI_eigenvectors_tmp,1),CI_electronic_energy_tmp,               &
             N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
 
-        CI_electronic_energy(1:N_states_diag_save) = CI_electronic_energy_tmp(1:N_states_diag_save)
+        CI_electronic_energy_real(1:N_states_diag_save) = CI_electronic_energy_tmp(1:N_states_diag_save)
         CI_eigenvectors(1:N_det,1:N_states_diag_save) = CI_eigenvectors_tmp(1:N_det,1:N_states_diag_save)
-        CI_s2(1:N_states_diag_save) = CI_s2_tmp(1:N_states_diag_save)
+        CI_s2_real(1:N_states_diag_save) = CI_s2_tmp(1:N_states_diag_save)
 
         deallocate (CI_electronic_energy_tmp)
         deallocate (CI_eigenvectors_tmp)
@@ -110,7 +123,7 @@ END_PROVIDER
          H_prime(j,j) = H_prime(j,j) - alpha*expected_s2
        enddo
        call lapack_diag(eigenvalues,eigenvectors,H_prime,size(H_prime,1),N_det)
-       CI_electronic_energy(:) = 0.d0
+       CI_electronic_energy_real(:) = 0.d0
        i_state = 0
        allocate (s2_eigvalues(N_det))
        allocate(index_good_state_array(N_det),good_state_array(N_det))
@@ -141,8 +154,8 @@ END_PROVIDER
            do i=1,N_det
              CI_eigenvectors(i,j) = eigenvectors(i,index_good_state_array(j))
            enddo
-           CI_electronic_energy(j) = eigenvalues(index_good_state_array(j))
-           CI_s2(j) = s2_eigvalues(index_good_state_array(j))
+           CI_electronic_energy_real(j) = eigenvalues(index_good_state_array(j))
+           CI_s2_real(j) = s2_eigvalues(index_good_state_array(j))
          enddo
          i_other_state = 0
          do j = 1, N_det
@@ -154,8 +167,8 @@ END_PROVIDER
            do i=1,N_det
              CI_eigenvectors(i,i_state+i_other_state) = eigenvectors(i,j)
            enddo
-           CI_electronic_energy(i_state+i_other_state) = eigenvalues(j)
-           CI_s2(i_state+i_other_state) = s2_eigvalues(i_state+i_other_state)
+           CI_electronic_energy_real(i_state+i_other_state) = eigenvalues(j)
+           CI_s2_real(i_state+i_other_state) = s2_eigvalues(i_state+i_other_state)
          enddo
 
        else
@@ -172,8 +185,8 @@ END_PROVIDER
            do i=1,N_det
              CI_eigenvectors(i,j) = eigenvectors(i,j)
            enddo
-           CI_electronic_energy(j) = eigenvalues(j)
-           CI_s2(j) = s2_eigvalues(j)
+           CI_electronic_energy_real(j) = eigenvalues(j)
+           CI_s2_real(j) = s2_eigvalues(j)
          enddo
        endif
        deallocate(index_good_state_array,good_state_array)
@@ -181,22 +194,22 @@ END_PROVIDER
      else
        call lapack_diag(eigenvalues,eigenvectors,                    &
            H_matrix_all_dets,size(H_matrix_all_dets,1),N_det)
-       CI_electronic_energy(:) = 0.d0
-       call u_0_S2_u_0(CI_s2,eigenvectors,N_det,psi_det,N_int,       &
-           min(N_det,N_states_diag),size(eigenvectors,1))
+       CI_electronic_energy_real(:) = 0.d0
+       call u_0_S2_u_0(CI_s2_real,eigenvectors,N_det,psi_det,N_int,&
+          min(N_det,N_states_diag),size(eigenvectors,1))
        ! Select the "N_states_diag" states of lowest energy
        do j=1,min(N_det,N_states_diag)
          do i=1,N_det
            CI_eigenvectors(i,j) = eigenvectors(i,j)
          enddo
-         CI_electronic_energy(j) = eigenvalues(j)
+         CI_electronic_energy_real(j) = eigenvalues(j)
        enddo
      endif
      do k=1,N_states_diag
-       CI_electronic_energy(k) = 0.d0
+       CI_electronic_energy_real(k) = 0.d0
        do j=1,N_det
          do i=1,N_det
-           CI_electronic_energy(k) +=                                &
+           CI_electronic_energy_real(k) +=                                &
                CI_eigenvectors(i,k) * CI_eigenvectors(j,k) *         &
                H_matrix_all_dets(i,j)
          enddo
@@ -207,7 +220,225 @@ END_PROVIDER
 
 END_PROVIDER
 
-subroutine diagonalize_CI
+ BEGIN_PROVIDER [ double precision, CI_electronic_energy_complex, (N_states_diag) ]
+&BEGIN_PROVIDER [ complex*16, CI_eigenvectors_complex, (N_det,N_states_diag) ]
+&BEGIN_PROVIDER [ double precision, CI_s2_complex, (N_states_diag) ]
+   BEGIN_DOC
+   ! Eigenvectors/values of the |CI| matrix
+   END_DOC
+   implicit none
+   double precision               :: ovrlp
+   complex*16                     :: u_dot_v_complex
+   integer                        :: i_good_state
+   integer, allocatable           :: index_good_state_array(:)
+   logical, allocatable           :: good_state_array(:)
+   double precision, allocatable  :: s2_values_tmp(:)
+   integer                        :: i_other_state
+   double precision, allocatable  :: eigenvalues(:)
+   complex*16, allocatable        :: eigenvectors(:,:), H_prime(:,:)
+   integer                        :: i_state
+   double precision               :: e_0
+   integer                        :: i,j,k
+   double precision, allocatable  :: s2_eigvalues(:)
+   double precision, allocatable  :: e_array(:)
+   integer, allocatable           :: iorder(:)
+   logical                        :: converged
+
+   PROVIDE threshold_davidson nthreads_davidson
+   ! Guess values for the "N_states" states of the |CI| eigenvectors
+   do j=1,min(N_states,N_det)
+     do i=1,N_det
+       ci_eigenvectors_complex(i,j) = psi_coef_complex(i,j)
+     enddo
+   enddo
+
+   do j=min(N_states,N_det)+1,N_states_diag
+     do i=1,N_det
+       ci_eigenvectors_complex(i,j) = (0.d0,0.d0)
+     enddo
+   enddo
+
+   if (diag_algorithm == "Davidson") then
+
+     call davidson_diag_hs2_complex(psi_det,ci_eigenvectors_complex, ci_s2_complex, &
+         size(ci_eigenvectors_complex,1),ci_electronic_energy_complex,               &
+         N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
+
+     integer :: N_states_diag_save
+     N_states_diag_save = N_states_diag
+     do while (.not.converged)
+        double precision, allocatable :: ci_electronic_energy_tmp (:)
+        complex*16, allocatable :: ci_eigenvectors_tmp (:,:)
+        double precision, allocatable :: ci_s2_tmp (:)
+
+        N_states_diag  *= 2
+        TOUCH N_states_diag
+
+        allocate (ci_electronic_energy_tmp (N_states_diag) )
+        allocate (ci_eigenvectors_tmp (N_det,N_states_diag) )
+        allocate (ci_s2_tmp (N_states_diag) )
+
+        ci_electronic_energy_tmp(1:N_states_diag_save) = ci_electronic_energy_complex(1:N_states_diag_save)
+        ci_eigenvectors_tmp(1:N_det,1:N_states_diag_save) = ci_eigenvectors_complex(1:N_det,1:N_states_diag_save)
+        ci_s2_tmp(1:N_states_diag_save) = ci_s2_complex(1:N_states_diag_save)
+
+        call davidson_diag_hs2_complex(psi_det,ci_eigenvectors_tmp, ci_s2_tmp, &
+            size(ci_eigenvectors_tmp,1),ci_electronic_energy_tmp,               &
+            N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
+
+        ci_electronic_energy_complex(1:N_states_diag_save) = ci_electronic_energy_tmp(1:N_states_diag_save)
+        ci_eigenvectors_complex(1:N_det,1:N_states_diag_save) = ci_eigenvectors_tmp(1:N_det,1:N_states_diag_save)
+        ci_s2_complex(1:N_states_diag_save) = ci_s2_tmp(1:N_states_diag_save)
+
+        deallocate (ci_electronic_energy_tmp)
+        deallocate (ci_eigenvectors_tmp)
+        deallocate (ci_s2_tmp)
+     enddo
+     if (N_states_diag > N_states_diag_save) then
+       N_states_diag = N_states_diag_save
+       TOUCH N_states_diag
+     endif
+
+   else if (diag_algorithm == "Lapack") then
+
+     print *,  'Diagonalization of H using Lapack'
+     allocate (eigenvectors(size(h_matrix_all_dets_complex,1),N_det))
+     allocate (eigenvalues(N_det))
+     if (s2_eig) then
+       double precision, parameter :: alpha = 0.1d0
+       allocate (H_prime(N_det,N_det) )
+       H_prime(1:N_det,1:N_det) = h_matrix_all_dets_complex(1:N_det,1:N_det) +  &
+         alpha * s2_matrix_all_dets(1:N_det,1:N_det)
+       do j=1,N_det
+         H_prime(j,j) = H_prime(j,j) + alpha*(s_z2_sz - expected_s2)
+       enddo
+       call lapack_diag_complex(eigenvalues,eigenvectors,H_prime,size(H_prime,1),N_det)
+       ci_electronic_energy_complex(:) = 0.d0
+       i_state = 0
+       allocate (s2_eigvalues(N_det))
+       allocate(index_good_state_array(N_det),good_state_array(N_det))
+       good_state_array = .False.
+       call u_0_s2_u_0_complex(s2_eigvalues,eigenvectors,N_det,psi_det,N_int,&
+         N_det,size(eigenvectors,1))
+       if (only_expected_s2) then
+          do j=1,N_det
+            ! Select at least n_states states with S^2 values closed to "expected_s2"
+            if(dabs(s2_eigvalues(j)-expected_s2).le.0.5d0)then
+              i_state +=1
+              index_good_state_array(i_state) = j
+              good_state_array(j) = .True.
+            endif
+            if(i_state.eq.N_states) then
+              exit
+            endif
+          enddo
+       else
+          do j=1,N_det
+             index_good_state_array(j) = j
+             good_state_array(j) = .True.
+          enddo
+       endif
+       if(i_state .ne.0)then
+         ! Fill the first "i_state" states that have a correct S^2 value
+         do j = 1, i_state
+           do i=1,N_det
+             ci_eigenvectors_complex(i,j) = eigenvectors(i,index_good_state_array(j))
+           enddo
+           ci_electronic_energy_complex(j) = eigenvalues(index_good_state_array(j))
+           ci_s2_complex(j) = s2_eigvalues(index_good_state_array(j))
+         enddo
+         i_other_state = 0
+         do j = 1, N_det
+           if(good_state_array(j))cycle
+           i_other_state +=1
+           if(i_state+i_other_state.gt.n_states_diag)then
+             exit
+           endif
+           do i=1,N_det
+             ci_eigenvectors_complex(i,i_state+i_other_state) = eigenvectors(i,j)
+           enddo
+           ci_electronic_energy_complex(i_state+i_other_state) = eigenvalues(j)
+           ci_s2_complex(i_state+i_other_state) = s2_eigvalues(i_state+i_other_state)
+         enddo
+
+       else
+         print*,''
+         print*,'!!!!!!!!   WARNING  !!!!!!!!!'
+         print*,'  Within the ',N_det,'determinants selected'
+         print*,'  and the ',N_states_diag,'states requested'
+         print*,'  We did not find any state with S^2 values close to ',expected_s2
+         print*,'  We will then set the first N_states eigenvectors of the H matrix'
+         print*,'  as the ci_eigenvectors_complex'
+         print*,'  You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space'
+         print*,''
+         do j=1,min(N_states_diag,N_det)
+           do i=1,N_det
+             ci_eigenvectors_complex(i,j) = eigenvectors(i,j)
+           enddo
+           ci_electronic_energy_complex(j) = eigenvalues(j)
+           ci_s2_complex(j) = s2_eigvalues(j)
+         enddo
+       endif
+       deallocate(index_good_state_array,good_state_array)
+       deallocate(s2_eigvalues)
+     else
+       call lapack_diag_complex(eigenvalues,eigenvectors,                      &
+           H_matrix_all_dets_complex,size(H_matrix_all_dets_complex,1),N_det)
+       ci_electronic_energy_complex(:) = 0.d0
+       call u_0_S2_u_0_complex(ci_s2_complex,eigenvectors,N_det,psi_det,N_int,&
+          min(N_det,N_states_diag),size(eigenvectors,1))
+       ! Select the "N_states_diag" states of lowest energy
+       do j=1,min(N_det,N_states_diag)
+         do i=1,N_det
+           ci_eigenvectors_complex(i,j) = eigenvectors(i,j)
+         enddo
+         ci_electronic_energy_complex(j) = eigenvalues(j)
+       enddo
+     endif
+     do k=1,N_states_diag
+       ci_electronic_energy_complex(k) = 0.d0
+       do j=1,N_det
+         do i=1,N_det
+           !todo: accumulate imag parts to test? (should sum to zero)
+           ci_electronic_energy_complex(k) +=                                &
+               dble(dconjg(ci_eigenvectors_complex(i,k)) * ci_eigenvectors_complex(j,k) *  &
+               H_matrix_all_dets_complex(i,j))
+         enddo
+       enddo
+     enddo
+     deallocate(eigenvectors,eigenvalues)
+   endif
+
+END_PROVIDER
+
+subroutine diagonalize_ci
+  implicit none
+  if (is_complex) then
+    call diagonalize_ci_complex
+  else
+    call diagonalize_ci_real
+  endif
+end
+
+subroutine diagonalize_CI_complex
+  implicit none
+  BEGIN_DOC
+!  Replace the coefficients of the |CI| states by the coefficients of the
+!  eigenstates of the |CI| matrix.
+  END_DOC
+  integer :: i,j
+  do j=1,N_states
+    do i=1,N_det
+      psi_coef_complex(i,j) = ci_eigenvectors_complex(i,j)
+    enddo
+  enddo
+  psi_energy(1:N_states) = CI_electronic_energy(1:N_states)
+  psi_s2(1:N_states) = CI_s2(1:N_states)
+  !todo: touch ci_{s2,electronic_energy}?
+  SOFT_TOUCH psi_coef_complex CI_electronic_energy_complex ci_energy CI_eigenvectors_complex CI_s2_complex psi_energy psi_s2
+end
+
+subroutine diagonalize_CI_real
   implicit none
   BEGIN_DOC
 !  Replace the coefficients of the |CI| states by the coefficients of the
@@ -222,5 +453,6 @@ subroutine diagonalize_CI
   psi_energy(1:N_states) = CI_electronic_energy(1:N_states)
   psi_s2(1:N_states) = CI_s2(1:N_states)
 
-  SOFT_TOUCH psi_coef CI_electronic_energy CI_energy CI_eigenvectors CI_s2 psi_energy psi_s2
+  !todo: touch ci_{s2,electronic_energy}?
+  SOFT_TOUCH psi_coef CI_electronic_energy_real ci_energy CI_eigenvectors CI_s2_real psi_energy psi_s2
 end
diff --git a/src/davidson/print_e_components.irp.f b/src/davidson/print_e_components.irp.f
index ddf83474..fcea369d 100644
--- a/src/davidson/print_e_components.irp.f
+++ b/src/davidson/print_e_components.irp.f
@@ -5,7 +5,8 @@ subroutine print_energy_components()
   END_DOC
   integer, save :: ifirst = 0
   double precision :: Vee, Ven, Vnn, Vecp, T, f
-  integer  :: i,j,k
+  complex*16 :: fc
+  integer  :: i,j,k,kk
 
   Vnn = nuclear_repulsion
 
@@ -17,15 +18,32 @@ subroutine print_energy_components()
     Ven  = 0.d0
     Vecp = 0.d0
     T    = 0.d0
-
-    do j=1,mo_num
-      do i=1,mo_num
-        f = one_e_dm_mo_alpha(i,j,k) + one_e_dm_mo_beta(i,j,k)
-        Ven  = Ven  + f * mo_integrals_n_e(i,j)
-        Vecp = Vecp + f * mo_pseudo_integrals(i,j)
-        T    = T    + f * mo_kinetic_integrals(i,j)
+    
+    if (is_complex) then
+      do kk=1,kpt_num
+        do j=1,mo_num_per_kpt
+          do i=1,mo_num_per_kpt
+            !fc = one_e_dm_mo_alpha_complex(i,j,k) + one_e_dm_mo_beta_complex(i,j,k)
+            !Ven  = Ven  + dble(fc * mo_integrals_n_e_complex(j,i))
+            !Vecp = Vecp + dble(fc * mo_pseudo_integrals_complex(j,i))
+            !T    = T    + dble(fc * mo_kinetic_integrals_complex(j,i))
+            fc = one_e_dm_mo_alpha_kpts(i,j,kk,k) + one_e_dm_mo_beta_kpts(i,j,kk,k)
+            Ven  = Ven  + dble(fc * mo_integrals_n_e_kpts(j,i,kk))
+            Vecp = Vecp + dble(fc * mo_pseudo_integrals_kpts(j,i,kk))
+            T    = T    + dble(fc * mo_kinetic_integrals_kpts(j,i,kk))
+          enddo
+        enddo
       enddo
-    enddo
+    else
+      do j=1,mo_num
+        do i=1,mo_num
+          f = one_e_dm_mo_alpha(i,j,k) + one_e_dm_mo_beta(i,j,k)
+          Ven  = Ven  + f * mo_integrals_n_e(i,j)
+          Vecp = Vecp + f * mo_pseudo_integrals(i,j)
+          T    = T    + f * mo_kinetic_integrals(i,j)
+        enddo
+      enddo
+    endif
     Vee = psi_energy(k) - Ven - Vecp - T
     
     if (ifirst == 0) then
diff --git a/src/davidson/u0_h_u0.irp.f b/src/davidson/u0_h_u0.irp.f
index 302b8423..dabfb150 100644
--- a/src/davidson/u0_h_u0.irp.f
+++ b/src/davidson/u0_h_u0.irp.f
@@ -4,9 +4,14 @@
   BEGIN_DOC
 ! psi_energy(i) = $\langle \Psi_i | H | \Psi_i \rangle$
 ! 
-! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$
+! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$
+! real and complex
   END_DOC
-  call u_0_H_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size)
+  if (is_complex) then
+    call u_0_h_u_0_complex(psi_energy,psi_s2,psi_coef_complex,N_det,psi_det,N_int,N_states,psi_det_size)
+  else
+    call u_0_H_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size)
+  endif
   integer :: i
   do i=N_det+1,N_states
     psi_energy(i) = 0.d0
@@ -708,3 +713,705 @@ N_int;;
 END_TEMPLATE
 
 
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+subroutine u_0_H_u_0_complex(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze)
+  !todo: check normalization for complex
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $E_0 = \frac{\langle u_0 | H | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
+  !
+  ! and      $S_0 = \frac{\langle u_0 | S^2 | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
+  !
+  ! n : number of determinants
+  !
+  END_DOC
+  integer, intent(in)             :: n,Nint, N_st, sze
+  double precision, intent(out)   :: e_0(N_st),s_0(N_st)
+  complex*16, intent(inout)       :: u_0(sze,N_st)
+  integer(bit_kind),intent(in)    :: keys_tmp(Nint,2,n)
+
+  complex*16, allocatable         :: v_0(:,:), s_vec(:,:), u_1(:,:)
+  double precision                :: u_dot_u_complex,diag_H_mat_elem
+  complex*16                      :: u_dot_v_complex
+  integer                         :: i,j, istate
+
+  if ((n > 100000).and.distributed_davidson) then
+    allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag))
+    u_1(:,:) = (0.d0,0.d0)
+    u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
+    call h_s2_u_0_nstates_zmq_complex(v_0,s_vec,u_1,N_states_diag,n)
+  else if (n < n_det_max_full) then
+    allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st))
+    v_0(:,:) = (0.d0,0.d0)
+    u_1(:,:) = (0.d0,0.d0)
+    s_vec(:,:) = (0.d0,0.d0)
+    u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
+    do istate = 1,N_st
+      do j=1,n
+        do i=1,n
+          v_0(i,istate) = v_0(i,istate) + h_matrix_all_dets_complex(i,j) * u_0(j,istate)
+          s_vec(i,istate) = s_vec(i,istate) + S2_matrix_all_dets(i,j) * u_0(j,istate)
+        enddo
+      enddo
+    enddo
+  else
+    allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st))
+    u_1(:,:) = (0.d0,0.d0)
+    u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
+    call h_s2_u_0_nstates_openmp_complex(v_0,s_vec,u_1,N_st,n)
+  endif
+  u_0(1:n,1:N_st) = u_1(1:n,1:N_st)
+  deallocate(u_1)
+  double precision :: norm
+  !$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED)
+  do i=1,N_st
+    norm = u_dot_u_complex(u_0(1,i),n)
+    if (norm /= 0.d0) then
+      !todo: should these be normalized? is u_0 already normalized? (if so, where?)
+      e_0(i) = dble(u_dot_v_complex(v_0(1,i),u_0(1,i),n))
+      s_0(i) = dble(u_dot_v_complex(s_vec(1,i),u_0(1,i),n))
+    else
+      e_0(i) = 0.d0
+      s_0(i) = 0.d0
+    endif
+  enddo
+  !$OMP END PARALLEL DO
+  deallocate (s_vec, v_0)
+end
+
+
+subroutine H_S2_u_0_nstates_openmp_complex(v_0,s_0,u_0,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2  | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  !complex*16, intent(out)    :: v_0(sze,N_st), s_0(sze,N_st)
+  !complex*16, intent(inout)  :: u_0(sze,N_st)
+  complex*16, intent(inout)  :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
+  integer :: k
+  complex*16, allocatable  :: u_t(:,:), v_t(:,:), s_t(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
+  allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det))
+
+  do k=1,N_st
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
+  enddo
+  v_t = (0.d0,0.d0)
+  s_t = (0.d0,0.d0)
+  call cdtranspose(                                                   &
+      u_0,                                                           &
+      size(u_0, 1),                                                  &
+      u_t,                                                           &
+      size(u_t, 1),                                                  &
+      N_det, N_st)
+
+  call h_s2_u_0_nstates_openmp_work_complex(v_t,s_t,u_t,N_st,sze,1,N_det,0,1)
+  deallocate(u_t)
+
+  call cdtranspose(                                                   &
+      v_t,                                                           &
+      size(v_t, 1),                                                  &
+      v_0,                                                           &
+      size(v_0, 1),                                                  &
+      N_st, N_det)
+  call cdtranspose(                                                   &
+      s_t,                                                           &
+      size(s_t, 1),                                                  &
+      s_0,                                                           &
+      size(s_0, 1),                                                  &
+      N_st, N_det)
+  deallocate(v_t,s_t)
+
+  do k=1,N_st
+    call cdset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    call cdset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    call cdset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+  enddo
+
+end
+subroutine h_s2_u_0_nstates_openmp_work_complex(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t\rangle$ and $s_t = S^2  | u_t\rangle$
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  complex*16, intent(in)   :: u_t(N_st,N_det)
+  complex*16, intent(out)  :: v_t(N_st,sze), s_t(N_st,sze)
+
+
+  PROVIDE ref_bitmask_energy N_int
+
+  select case (N_int)
+    case (1)
+      call H_S2_u_0_nstates_openmp_work_complex_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+    case (2)
+      call H_S2_u_0_nstates_openmp_work_complex_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+    case (3)
+      call H_S2_u_0_nstates_openmp_work_complex_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+    case (4)
+      call H_S2_u_0_nstates_openmp_work_complex_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+    case default
+      call H_S2_u_0_nstates_openmp_work_complex_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+  end select
+end
+
+BEGIN_TEMPLATE
+
+subroutine H_S2_u_0_nstates_openmp_work_complex_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$
+  !
+  ! Default should be 1,N_det,0,1
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  complex*16, intent(in)         :: u_t(N_st,N_det)
+  complex*16, intent(out)        :: v_t(N_st,sze), s_t(N_st,sze)
+
+  complex*16                     :: hij
+  double precision               :: sij
+  integer                        :: i,j,k,l,kk
+  integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
+  integer                        :: istate
+  integer                        :: krow, kcol, krow_b, kcol_b
+  integer                        :: lrow, lcol
+  integer                        :: mrow, mcol
+  integer(bit_kind)              :: spindet($N_int)
+  integer(bit_kind)              :: tmp_det($N_int,2)
+  integer(bit_kind)              :: tmp_det2($N_int,2)
+  integer(bit_kind)              :: tmp_det3($N_int,2)
+  integer(bit_kind), allocatable :: buffer(:,:)
+  integer                        :: n_doubles
+  integer, allocatable           :: doubles(:)
+  integer, allocatable           :: singles_a(:)
+  integer, allocatable           :: singles_b(:)
+  integer, allocatable           :: idx(:), idx0(:)
+  integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
+  integer*8                      :: k8
+  logical                        :: compute_singles
+  integer*8                      :: last_found, left, right, right_max
+  double precision               :: rss, mem, ratio
+  complex*16, allocatable        :: utl(:,:)
+  integer, parameter             :: block_size=128
+
+!  call resident_memory(rss)
+!  mem = dble(singles_beta_csc_size) / 1024.d0**3
+!
+!  compute_singles = (mem+rss > qp_max_mem)
+!
+!  if (.not.compute_singles) then
+!    provide singles_beta_csc
+!  endif
+compute_singles=.True.
+
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+  allocate(idx0(maxab))
+
+  do i=1,maxab
+    idx0(i) = i
+  enddo
+
+  ! Prepare the array of all alpha single excitations
+  ! -------------------------------------------------
+
+  PROVIDE N_int nthreads_davidson
+  !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson)        &
+      !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,                &
+      !$OMP          psi_bilinear_matrix_columns,                    &
+      !$OMP          psi_det_alpha_unique, psi_det_beta_unique,      &
+      !$OMP          n_det_alpha_unique, n_det_beta_unique, N_int,   &
+      !$OMP          psi_bilinear_matrix_transp_rows,                &
+      !$OMP          psi_bilinear_matrix_transp_columns,             &
+      !$OMP          psi_bilinear_matrix_transp_order, N_st,         &
+      !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
+      !$OMP          psi_bilinear_matrix_columns_loc,                &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t, s_t,        &
+      !$OMP          ishift, idx0, u_t, maxab, compute_singles,      &
+      !$OMP          singles_alpha_csc,singles_alpha_csc_idx,        &
+      !$OMP          singles_beta_csc,singles_beta_csc_idx)          &
+      !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
+      !$OMP          lcol, lrow, l_a, l_b, utl, kk,                  &
+      !$OMP          buffer, doubles, n_doubles,                     &
+      !$OMP          tmp_det2, hij, sij, idx, l, kcol_prev,          &
+      !$OMP          singles_a, n_singles_a, singles_b, ratio,       &
+      !$OMP          n_singles_b, k8, last_found,left,right,right_max)
+
+  ! Alpha/Beta double excitations
+  ! =============================
+
+  allocate( buffer($N_int,maxab),                                     &
+      singles_a(maxab),                                              &
+      singles_b(maxab),                                              &
+      doubles(maxab),                                                &
+      idx(maxab), utl(N_st,block_size))
+
+  kcol_prev=-1
+
+  ASSERT (iend <= N_det)
+  ASSERT (istart > 0)
+  ASSERT (istep  > 0)
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+
+    if (kcol /= kcol_prev) then
+      tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+      if (compute_singles) then
+        call get_all_spin_singles_$N_int(                              &
+            psi_det_beta_unique, idx0,                                 &
+            tmp_det(1,2), N_det_beta_unique,                           &
+            singles_b, n_singles_b)
+      else
+        n_singles_b = 0
+        !DIR$ LOOP COUNT avg(1000)
+        do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
+          n_singles_b = n_singles_b+1
+          singles_b(n_singles_b) = singles_beta_csc(k8)
+        enddo
+      endif
+    endif
+    kcol_prev = kcol
+
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
+
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b
+      lcol = singles_b(i)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
+
+!---
+!      if (compute_singles) then
+
+        l_a = psi_bilinear_matrix_columns_loc(lcol)
+        ASSERT (l_a <= N_det)
+
+        !DIR$ UNROLL(8)
+        !DIR$ LOOP COUNT avg(50000)
+        do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)  ! hot spot
+
+          ASSERT (l_a <= N_det)
+          idx(j) = l_a
+          l_a = l_a+1
+        enddo
+        j = j-1
+
+        call get_all_spin_singles_$N_int(                              &
+            buffer, idx, tmp_det(1,1), j,                              &
+            singles_a, n_singles_a )
+
+!-----
+!      else
+!
+! ! Search for singles
+!
+!call cpu_time(time0)
+! ! Right boundary
+!        l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1
+!        ASSERT (l_a <= N_det)
+!        do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
+!          lrow = psi_bilinear_matrix_rows(l_a)
+!          ASSERT (lrow <= N_det_alpha_unique)
+!
+!          left = singles_alpha_csc_idx(krow)
+!          right_max = -1_8
+!          right = singles_alpha_csc_idx(krow+1)
+!          do while (right-left>0_8)
+!            k8 = shiftr(right+left,1)
+!            if (singles_alpha_csc(k8) > lrow) then
+!              right = k8
+!            else if (singles_alpha_csc(k8) < lrow) then
+!              left = k8 + 1_8
+!            else
+!              right_max = k8+1_8
+!              exit
+!            endif
+!          enddo
+!          if (right_max > 0_8) exit
+!          l_a = l_a-1
+!        enddo
+!        if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow)
+!
+! ! Search
+!        n_singles_a = 0
+!        l_a = psi_bilinear_matrix_columns_loc(lcol)
+!        ASSERT (l_a <= N_det)
+!
+!        last_found = singles_alpha_csc_idx(krow)
+!        do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
+!          lrow = psi_bilinear_matrix_rows(l_a)
+!          ASSERT (lrow <= N_det_alpha_unique)
+!
+!          left = last_found
+!          right = right_max
+!          do while (right-left>0_8)
+!            k8 = shiftr(right+left,1)
+!            if (singles_alpha_csc(k8) > lrow) then
+!              right = k8
+!            else if (singles_alpha_csc(k8) < lrow) then
+!              left = k8 + 1_8
+!            else
+!              n_singles_a += 1
+!              singles_a(n_singles_a) = l_a
+!              last_found = k8+1_8
+!              exit
+!            endif
+!          enddo
+!          l_a = l_a+1
+!        enddo
+!        j = j-1
+!
+!      endif
+!-----
+
+      ! Loop over alpha singles
+      ! -----------------------
+
+      !DIR$ LOOP COUNT avg(1000)
+      do k = 1,n_singles_a,block_size
+        ! Prefetch u_t(:,l_a)
+        do kk=0,block_size-1
+          if (k+kk > n_singles_a) exit
+          l_a = singles_a(k+kk)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+          enddo
+        enddo
+
+        do kk=0,block_size-1
+          if (k+kk > n_singles_a) exit
+          l_a = singles_a(k+kk)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+          !todo: check arg order conjg/noconjg (should be okay)
+          call i_h_j_double_alpha_beta_complex(tmp_det,tmp_det2,$N_int,hij)
+          call get_s2(tmp_det,tmp_det2,$N_int,sij)
+          !DIR$ LOOP COUNT AVG(4)
+          do l=1,N_st
+          !todo: check arg order conjg/noconjg (should be okay)
+            v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+            s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1)
+          enddo
+        enddo
+      enddo
+
+    enddo
+
+  enddo
+  !$OMP END DO
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep
+
+
+    ! Single and double alpha excitations
+    ! ===================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! ----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,1)
+
+    ! Loop inside the beta column to gather all the connected alphas
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
+
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_alpha_unique
+      if (l_a > N_det) exit
+      lcol = psi_bilinear_matrix_columns(l_a)
+      if (lcol /= kcol) exit
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot
+      idx(i) = l_a
+      l_a = l_a+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_a, doubles, n_singles_a, n_doubles )
+
+    ! Compute Hij for all alpha singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_a,block_size
+      ! Prefetch u_t(:,l_a)
+      do kk=0,block_size-1
+        if (i+kk > n_singles_a) exit
+        l_a = singles_a(i+kk)
+        ASSERT (l_a <= N_det)
+
+        do l=1,N_st
+          utl(l,kk+1) = u_t(l,l_a)
+        enddo
+      enddo
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_a) exit
+        l_a = singles_a(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+        !todo: check arg order conjg/noconjg (should be okay)
+        call i_h_j_single_spin_complex( tmp_det, tmp_det2, $N_int, 1, hij)
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+        !todo: check arg order conjg/noconjg (should be okay)
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          ! single => sij = 0
+        enddo
+      enddo
+    enddo
+
+
+    ! Compute Hij for all alpha doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      ! Prefetch u_t(:,l_a)
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_a = doubles(i+kk)
+        ASSERT (l_a <= N_det)
+
+        do l=1,N_st
+          utl(l,kk+1) = u_t(l,l_a)
+        enddo
+      enddo
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_a = doubles(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        !todo: check arg order conjg/noconjg (should be okay)
+        call i_h_j_double_spin_complex( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+        !todo: check arg order conjg/noconjg (should be okay)
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          ! same spin => sij = 0
+        enddo
+      enddo
+    enddo
+
+
+    ! Single and double beta excitations
+    ! ==================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    kcol = psi_bilinear_matrix_columns(k_a)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,2)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! -----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    ! Loop inside the alpha row to gather all the connected betas
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_beta_unique
+      if (l_b > N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l_b)
+      if (lrow /= krow) exit
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
+      idx(i) = l_b
+      l_b = l_b+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_b, doubles, n_singles_b, n_doubles )
+
+    ! Compute Hij for all beta singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b,block_size
+      do kk=0,block_size-1
+        if (i+kk > n_singles_b) exit
+        l_b = singles_b(i+kk)
+        ASSERT (l_b <= N_det)
+
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        ASSERT (l_a <= N_det)
+
+        do l=1,N_st
+          utl(l,kk+1) = u_t(l,l_a)
+        enddo
+      enddo
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_b) exit
+        l_b = singles_b(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+        call i_h_j_single_spin_complex( tmp_det, tmp_det2, $N_int, 2, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          !todo: check arg order conjg/noconjg (should be okay)
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          ! single => sij = 0
+        enddo
+      enddo
+    enddo
+
+    ! Compute Hij for all beta doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_b = doubles(i+kk)
+        ASSERT (l_b <= N_det)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        ASSERT (l_a <= N_det)
+
+        do l=1,N_st
+          utl(l,kk+1) = u_t(l,l_a)
+        enddo
+      enddo
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_b = doubles(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        !todo: check arg order conjg/noconjg (should be okay)
+        call i_h_j_double_spin_complex( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          !todo: check arg order conjg/noconjg (should be okay)
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          ! same spin => sij = 0
+        enddo
+      enddo
+    enddo
+
+
+    ! Diagonal contribution
+    ! =====================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    double precision, external :: diag_H_mat_elem, diag_S_mat_elem
+
+    hij = dcmplx(diag_H_mat_elem(tmp_det,$N_int),0.d0)
+    sij = diag_s_mat_elem(tmp_det,$N_int)
+    !DIR$ LOOP COUNT AVG(4)
+    do l=1,N_st
+      v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
+      s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
+    enddo
+
+  end do
+  !$OMP END DO
+  deallocate(buffer, singles_a, singles_b, doubles, idx, utl)
+  !$OMP END PARALLEL
+
+end
+
+SUBST [ N_int ]
+
+1;;
+2;;
+3;;
+4;;
+N_int;;
+
+END_TEMPLATE
+
+
diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg
index 662c6fbb..b03ab374 100644
--- a/src/determinants/EZFIO.cfg
+++ b/src/determinants/EZFIO.cfg
@@ -83,6 +83,12 @@ doc: Coefficients of the wave function
 type: double precision 
 size: (determinants.n_det,determinants.n_states)
 
+[psi_coef_complex]
+interface: ezfio
+doc: Coefficients of the wave function
+type: double precision 
+size: (2,determinants.n_det,determinants.n_states)
+
 [psi_det]
 interface: ezfio
 doc: Determinants of the variational space
@@ -95,6 +101,12 @@ doc: Coefficients of the wave function
 type: double precision 
 size: (determinants.n_det_qp_edit,determinants.n_states)
 
+[psi_coef_complex_qp_edit]
+interface: ezfio
+doc: Coefficients of the wave function
+type: double precision 
+size: (2,determinants.n_det_qp_edit,determinants.n_states)
+
 [psi_det_qp_edit]
 interface: ezfio
 doc: Determinants of the variational space
diff --git a/src/determinants/create_excitations.irp.f b/src/determinants/create_excitations.irp.f
index cec87901..f3b19afa 100644
--- a/src/determinants/create_excitations.irp.f
+++ b/src/determinants/create_excitations.irp.f
@@ -80,6 +80,33 @@ subroutine build_singly_excited_wavefunction(i_hole,i_particle,ispin,det_out,coe
   enddo
 end
 
+subroutine build_singly_excited_wavefunction_complex(i_hole,i_particle,ispin,det_out,coef_out)
+  implicit none
+  BEGIN_DOC
+  ! Applies the single excitation operator : a^{dager}_(i_particle) a_(i_hole) of
+  ! spin = ispin to the current wave function (psi_det, psi_coef)
+  END_DOC
+  integer, intent(in)            :: i_hole,i_particle,ispin
+  integer(bit_kind), intent(out) :: det_out(N_int,2,N_det)
+  complex*16, intent(out)  :: coef_out(N_det,N_states)
+
+  integer :: k
+  integer :: i_ok
+  double precision :: phase
+  do k=1,N_det
+    coef_out(k,:) = psi_coef(k,:)
+    det_out(:,:,k) = psi_det(:,:,k)
+    call do_single_excitation(det_out(1,1,k),i_hole,i_particle,ispin,i_ok)
+    if (i_ok == 1) then
+      call get_phase(psi_det(1,1,k), det_out(1,1,k),phase,N_int)
+      coef_out(k,:) = phase * coef_out(k,:)
+    else
+      coef_out(k,:) = (0.d0,0.d0)
+      det_out(:,:,k) = psi_det(:,:,k)
+    endif
+  enddo
+end
+
 logical function is_spin_flip_possible(key_in,i_flip,ispin)
   implicit none
   BEGIN_DOC
diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f
index e69a1803..ac3157e4 100644
--- a/src/determinants/density_matrix.irp.f
+++ b/src/determinants/density_matrix.irp.f
@@ -248,29 +248,58 @@ BEGIN_PROVIDER [ double precision, one_e_spin_density_mo, (mo_num,mo_num) ]
 END_PROVIDER
 
 subroutine set_natural_mos
-   implicit none
-   BEGIN_DOC
-   ! Set natural orbitals, obtained by diagonalization of the one-body density matrix
-   ! in the |MO| basis
-   END_DOC
-   character*(64)                 :: label
-   double precision, allocatable  :: tmp(:,:)
+  implicit none
+  BEGIN_DOC
+  ! Set natural orbitals, obtained by diagonalization of the one-body density matrix
+  ! in the |MO| basis
+  END_DOC
+  character*(64)                 :: label
+  double precision, allocatable  :: tmp(:,:)
 
-   label = "Natural"
-    integer :: i,j,iorb,jorb
-    do i = 1, n_virt_orb
-     iorb = list_virt(i)
-     do j = 1, n_core_inact_act_orb
-      jorb = list_core_inact_act(j)
-      if(one_e_dm_mo(iorb,jorb).ne. 0.d0)then
-        print*,'AHAHAH'
-        print*,iorb,jorb,one_e_dm_mo(iorb,jorb)
-        stop
-      endif
-     enddo
+  label = "Natural"
+  integer :: i,j,iorb,jorb,k
+  if (is_complex) then
+
+    !todo: implement for kpts
+    do k=1,kpt_num
+      do i = 1, n_virt_orb_kpts(k)
+        iorb = list_virt_kpts(i,k)
+        do j = 1, n_core_inact_act_orb_kpts(k)
+          jorb = list_core_inact_act_kpts(j,k)
+          if(cdabs(one_e_dm_mo_kpts(iorb,jorb,k)).ne. 0.d0)then
+            print*,'AHAHAH'
+            print*,iorb,jorb,k,one_e_dm_mo_kpts(iorb,jorb,k)
+            stop
+          endif
+        enddo
+      enddo
     enddo
-   call mo_as_svd_vectors_of_mo_matrix_eig(one_e_dm_mo,size(one_e_dm_mo,1),mo_num,mo_num,mo_occ,label)
-   soft_touch mo_occ
+    !print*,'1RDM'
+    !do k=1,kpt_num
+    !  do j=1,mo_num_per_kpt
+    !    do i=1,mo_num_per_kpt
+    !      print'(3(I5),2(E25.15))',i,j,k,one_e_dm_mo_kpts(i,j,k)
+    !    enddo
+    !  enddo
+    !enddo
+!    call mo_as_svd_vectors_of_mo_matrix_eig_complex(one_e_dm_mo_complex,size(one_e_dm_mo_complex,1),mo_num,mo_num,mo_occ,label)
+    call mo_as_svd_vectors_of_mo_matrix_eig_kpts(one_e_dm_mo_kpts,size(one_e_dm_mo_kpts,1),mo_num_per_kpt,mo_num_per_kpt,kpt_num,mo_occ_kpts,label)
+    soft_touch mo_occ_kpts
+  else
+    do i = 1, n_virt_orb
+      iorb = list_virt(i)
+      do j = 1, n_core_inact_act_orb
+        jorb = list_core_inact_act(j)
+        if(one_e_dm_mo(iorb,jorb).ne. 0.d0)then
+          print*,'AHAHAH'
+          print*,iorb,jorb,one_e_dm_mo(iorb,jorb)
+          stop
+        endif
+      enddo
+    enddo
+    call mo_as_svd_vectors_of_mo_matrix_eig(one_e_dm_mo,size(one_e_dm_mo,1),mo_num,mo_num,mo_occ,label)
+    soft_touch mo_occ
+  endif
 
 end
 subroutine save_natural_mos
@@ -292,11 +321,19 @@ BEGIN_PROVIDER [ double precision, c0_weight, (N_states) ]
    if (N_states > 1) then
      integer                        :: i
      double precision               :: c
+     if (is_complex) then
+       do i=1,N_states
+         c0_weight(i) = 1.d-31
+         c = maxval(cdabs(psi_coef_complex(:,i) * psi_coef_complex(:,i)))
+         c0_weight(i) = 1.d0/(c+1.d-20)
+       enddo
+     else
      do i=1,N_states
        c0_weight(i) = 1.d-31
        c = maxval(psi_coef(:,i) * psi_coef(:,i))
        c0_weight(i) = 1.d0/(c+1.d-20)
      enddo
+     endif
      c = 1.d0/minval(c0_weight(:))
      do i=1,N_states
        c0_weight(i) = c0_weight(i) * c
@@ -398,8 +435,23 @@ subroutine get_occupation_from_dets(istate,occupation)
   ASSERT (istate <= N_states)
 
   occupation = 0.d0
-  double precision, external :: u_dot_u
+  
+  if (is_complex) then
+    double precision, external :: u_dot_u_complex
+    norm_2 = 1.d0/u_dot_u_complex(psi_coef_complex(1,istate),N_det)
 
+    do i=1,N_det
+      c = cdabs(psi_coef_complex(i,istate)*psi_coef_complex(i,istate))*norm_2
+      call bitstring_to_list_ab(psi_det(1,1,i), list, n_elements, N_int)
+      do ispin=1,2
+        do j=1,n_elements(ispin)
+          ASSERT ( list(j,ispin) < mo_num )
+          occupation( list(j,ispin) ) += c
+        enddo
+      enddo
+    enddo
+  else
+  double precision, external :: u_dot_u
   norm_2 = 1.d0/u_dot_u(psi_coef(1,istate),N_det)
 
   do i=1,N_det
@@ -412,5 +464,6 @@ subroutine get_occupation_from_dets(istate,occupation)
       enddo
     enddo
   enddo
+  endif
 end
 
diff --git a/src/determinants/density_matrix_cplx.irp.f b/src/determinants/density_matrix_cplx.irp.f
new file mode 100644
index 00000000..e5d74347
--- /dev/null
+++ b/src/determinants/density_matrix_cplx.irp.f
@@ -0,0 +1,690 @@
+ BEGIN_PROVIDER [ complex*16, one_e_dm_mo_alpha_average_complex, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_mo_beta_average_complex, (mo_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $\alpha$ and $\beta$ one-body density matrix for each state
+   END_DOC
+   integer                        :: i
+   one_e_dm_mo_alpha_average_complex = (0.d0,0.d0)
+   one_e_dm_mo_beta_average_complex  = (0.d0,0.d0)
+   do i = 1,N_states
+     one_e_dm_mo_alpha_average_complex(:,:) += one_e_dm_mo_alpha_complex(:,:,i) * state_average_weight(i)
+     one_e_dm_mo_beta_average_complex(:,:) += one_e_dm_mo_beta_complex(:,:,i) * state_average_weight(i)
+   enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_diff_complex, (mo_num,mo_num,2:N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Difference of the one-body density matrix with respect to the ground state
+  END_DOC
+  integer                        :: i,j, istate
+
+  do istate=2,N_states
+    do j=1,mo_num
+      do i=1,mo_num
+        one_e_dm_mo_diff_complex(i,j,istate) =                            &
+            one_e_dm_mo_alpha_complex(i,j,istate) - one_e_dm_mo_alpha_complex(i,j,1) +&
+            one_e_dm_mo_beta_complex (i,j,istate) - one_e_dm_mo_beta_complex (i,j,1)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_spin_index_complex, (mo_num,mo_num,N_states,2) ]
+  implicit none
+  integer                        :: i,j,ispin,istate
+  ispin = 1
+  do istate = 1, N_states
+    do j = 1, mo_num
+      do i = 1, mo_num
+        one_e_dm_mo_spin_index_complex(i,j,istate,ispin) = one_e_dm_mo_alpha_complex(i,j,istate)
+      enddo
+    enddo
+  enddo
+
+  ispin = 2
+  do istate = 1, N_states
+    do j = 1, mo_num
+      do i = 1, mo_num
+        one_e_dm_mo_spin_index_complex(i,j,istate,ispin) = one_e_dm_mo_beta_complex(i,j,istate)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_dagger_mo_spin_index_complex, (mo_num,mo_num,N_states,2) ]
+  print*,irp_here,' not implemented for complex'
+  stop -1
+!   implicit none
+!   integer                        :: i,j,ispin,istate
+!   ispin = 1
+!   do istate = 1, N_states
+!     do j = 1, mo_num
+!       one_e_dm_dagger_mo_spin_index(j,j,istate,ispin) = 1 - one_e_dm_mo_alpha(j,j,istate)
+!       do i = j+1, mo_num
+!         one_e_dm_dagger_mo_spin_index(i,j,istate,ispin) = -one_e_dm_mo_alpha(i,j,istate)
+!         one_e_dm_dagger_mo_spin_index(j,i,istate,ispin) = -one_e_dm_mo_alpha(i,j,istate)
+!       enddo
+!     enddo
+!   enddo
+!
+!   ispin = 2
+!   do istate = 1, N_states
+!     do j = 1, mo_num
+!       one_e_dm_dagger_mo_spin_index(j,j,istate,ispin) = 1 - one_e_dm_mo_beta(j,j,istate)
+!       do i = j+1, mo_num
+!         one_e_dm_dagger_mo_spin_index(i,j,istate,ispin) = -one_e_dm_mo_beta(i,j,istate)
+!         one_e_dm_dagger_mo_spin_index(j,i,istate,ispin) = -one_e_dm_mo_beta(i,j,istate)
+!       enddo
+!     enddo
+!   enddo
+!
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, one_e_dm_mo_alpha_complex, (mo_num,mo_num,N_states) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_mo_beta_complex, (mo_num,mo_num,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! $\alpha$ and $\beta$ one-body density matrix for each state
+  ! $\gamma_{\mu\nu} = \langle\Psi|a_{\nu}^{\dagger}a_{\mu}|\Psi\rangle$
+  ! $\gamma_{\mu\nu} = \langle a_{\nu} \Psi|a_{\mu} \Psi\rangle$
+  ! $\gamma_{\mu\nu} = \sum_{IJ} c^*_J c_I \langle a_{\nu} I|a_{\mu} J\rangle$
+  END_DOC
+
+  integer                        :: j,k,l,m,k_a,k_b
+  integer                        :: occ(N_int*bit_kind_size,2)
+  complex*16               :: ck, cl, ckl
+  double precision               :: phase
+  integer                        :: h1,h2,p1,p2,s1,s2, degree
+  integer(bit_kind)              :: tmp_det(N_int,2), tmp_det2(N_int)
+  integer                        :: exc(0:2,2),n_occ(2)
+  complex*16, allocatable  :: tmp_a(:,:,:), tmp_b(:,:,:)
+  integer                        :: krow, kcol, lrow, lcol
+
+  PROVIDE psi_det psi_coef_complex
+
+  one_e_dm_mo_alpha_complex = (0.d0,0.d0)
+  one_e_dm_mo_beta_complex  = (0.d0,0.d0)
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,&
+      !$OMP  tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)&
+      !$OMP SHARED(psi_det,psi_coef_complex,N_int,N_states,elec_alpha_num,  &
+      !$OMP  elec_beta_num,one_e_dm_mo_alpha_complex,one_e_dm_mo_beta_complex,N_det,&
+      !$OMP  mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,&
+      !$OMP  psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,&
+      !$OMP  psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,&
+      !$OMP  psi_bilinear_matrix_values_complex, psi_bilinear_matrix_transp_values_complex,&
+      !$OMP  N_det_alpha_unique,N_det_beta_unique,irp_here)
+  allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) )
+  tmp_a = (0.d0,0.d0)
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=1,N_det
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      ck = cdabs(psi_bilinear_matrix_values_complex(k_a,m)*psi_bilinear_matrix_values_complex(k_a,m))
+      do l=1,elec_alpha_num
+        j = occ(l,1)
+        tmp_a(j,j,m) += ck
+      enddo
+    enddo
+
+    if (k_a == N_det) cycle
+    l = k_a+1
+    lrow = psi_bilinear_matrix_rows(l)
+    lcol = psi_bilinear_matrix_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lcol == kcol )
+      tmp_det2(:) = psi_det_alpha_unique(:, lrow)
+      call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        ! h1 occ in k
+        ! p1 occ in l
+        do m=1,N_states
+          ckl = dconjg(psi_bilinear_matrix_values_complex(k_a,m))*psi_bilinear_matrix_values_complex(l,m) * phase
+          tmp_a(h1,p1,m) += dconjg(ckl)
+          tmp_a(p1,h1,m) += ckl
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_rows(l)
+      lcol = psi_bilinear_matrix_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  one_e_dm_mo_alpha_complex(:,:,:) = one_e_dm_mo_alpha_complex(:,:,:) + tmp_a(:,:,:)
+  !$OMP END CRITICAL
+  deallocate(tmp_a)
+
+  tmp_b = (0.d0,0.d0)
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_b=1,N_det
+    krow = psi_bilinear_matrix_transp_rows(k_b)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_transp_columns(k_b)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int)
+    do m=1,N_states
+      ck = cdabs(psi_bilinear_matrix_transp_values_complex(k_b,m)*psi_bilinear_matrix_transp_values_complex(k_b,m))
+      do l=1,elec_beta_num
+        j = occ(l,2)
+        tmp_b(j,j,m) += ck
+      enddo
+    enddo
+
+    if (k_b == N_det) cycle
+    l = k_b+1
+    lrow = psi_bilinear_matrix_transp_rows(l)
+    lcol = psi_bilinear_matrix_transp_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lrow == krow )
+      tmp_det2(:) = psi_det_beta_unique(:, lcol)
+      call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        do m=1,N_states
+          ckl = dconjg(psi_bilinear_matrix_transp_values_complex(k_b,m))*psi_bilinear_matrix_transp_values_complex(l,m) * phase
+          tmp_b(h1,p1,m) += dconjg(ckl)
+          tmp_b(p1,h1,m) += ckl
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l)
+      lcol = psi_bilinear_matrix_transp_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  one_e_dm_mo_beta_complex(:,:,:)  = one_e_dm_mo_beta_complex(:,:,:)  + tmp_b(:,:,:)
+  !$OMP END CRITICAL
+
+  deallocate(tmp_b)
+  !$OMP END PARALLEL
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_complex, (mo_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! One-body density matrix
+   END_DOC
+   one_e_dm_mo_complex = one_e_dm_mo_alpha_average_complex + one_e_dm_mo_beta_average_complex
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_spin_density_mo_complex, (mo_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $\rho(\alpha) - \rho(\beta)$
+   END_DOC
+   one_e_spin_density_mo_complex = one_e_dm_mo_alpha_average_complex - one_e_dm_mo_beta_average_complex
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_spin_density_ao_complex, (ao_num,ao_num) ]
+   BEGIN_DOC
+   ! One body spin density matrix on the |AO| basis : $\rho_{AO}(\alpha) - \rho_{AO}(\beta)$
+   ! todo: verify that this is correct for complex
+   ! equivalent to using mo_to_ao_no_overlap?
+   END_DOC
+   implicit none
+   integer                        :: i,j,k,l
+   complex*16               :: dm_mo
+
+   one_e_spin_density_ao_complex = (0.d0,0.d0)
+   do k = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, mo_num
+         do j = 1, mo_num
+           dm_mo = one_e_spin_density_mo_complex(j,i)
+           !    if(dabs(dm_mo).le.1.d-10)cycle
+           one_e_spin_density_ao_complex(l,k) += dconjg(mo_coef_complex(k,i)) * mo_coef_complex(l,j) * dm_mo
+
+         enddo
+       enddo
+     enddo
+   enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, one_e_dm_ao_alpha_complex, (ao_num,ao_num) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_ao_beta_complex, (ao_num,ao_num) ]
+   BEGIN_DOC
+   ! One body density matrix on the |AO| basis : $\rho_{AO}(\alpha), \rho_{AO}(\beta)$.
+   END_DOC
+   implicit none
+   integer                        :: i,j,k,l
+   complex*16               :: mo_alpha,mo_beta
+
+   one_e_dm_ao_alpha_complex = (0.d0,0.d0)
+   one_e_dm_ao_beta_complex = (0.d0,0.d0)
+   do k = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, mo_num
+         do j = 1, mo_num
+           mo_alpha = one_e_dm_mo_alpha_average_complex(j,i)
+           mo_beta = one_e_dm_mo_beta_average_complex(j,i)
+           !    if(dabs(dm_mo).le.1.d-10)cycle
+           one_e_dm_ao_alpha_complex(l,k) += dconjg(mo_coef_complex(k,i)) * mo_coef_complex(l,j) *  mo_alpha
+           one_e_dm_ao_beta_complex(l,k) += dconjg(mo_coef_complex(k,i)) * mo_coef_complex(l,j)  *  mo_beta
+         enddo
+       enddo
+     enddo
+   enddo
+
+END_PROVIDER
+
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+ BEGIN_PROVIDER [ complex*16, one_e_dm_mo_alpha_average_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_mo_beta_average_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $\alpha$ and $\beta$ one-body density matrix for each state
+   END_DOC
+   integer                        :: i,k
+   one_e_dm_mo_alpha_average_kpts = (0.d0,0.d0)
+   one_e_dm_mo_beta_average_kpts  = (0.d0,0.d0)
+   do i = 1,N_states
+     do k=1,kpt_num
+       one_e_dm_mo_alpha_average_kpts(:,:,k) += one_e_dm_mo_alpha_kpts(:,:,k,i) * state_average_weight(i)
+       one_e_dm_mo_beta_average_kpts(:,:,k) += one_e_dm_mo_beta_kpts(:,:,k,i) * state_average_weight(i)
+     enddo
+   enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_diff_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num,2:N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Difference of the one-body density matrix with respect to the ground state
+  END_DOC
+  integer                        :: i,j, istate,k
+
+  do istate=2,N_states
+    do k=1,kpt_num
+      do j=1,mo_num_per_kpt
+        do i=1,mo_num_per_kpt
+          one_e_dm_mo_diff_kpts(i,j,k,istate) =                            &
+              one_e_dm_mo_alpha_kpts(i,j,k,istate) - one_e_dm_mo_alpha_kpts(i,j,k,1) +&
+              one_e_dm_mo_beta_kpts (i,j,k,istate) - one_e_dm_mo_beta_kpts (i,j,k,1)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_spin_index_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states,2) ]
+  implicit none
+  integer                        :: i,j,k,ispin,istate
+  ispin = 1
+  do istate = 1, N_states
+    do k=1,kpt_num
+      do j = 1, mo_num_per_kpt
+        do i = 1, mo_num_per_kpt
+          one_e_dm_mo_spin_index_kpts(i,j,k,istate,ispin) = one_e_dm_mo_alpha_kpts(i,j,k,istate)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ispin = 2
+  do istate = 1, N_states
+    do k=1,kpt_num
+      do j = 1, mo_num_per_kpt
+        do i = 1, mo_num_per_kpt
+          one_e_dm_mo_spin_index_kpts(i,j,k,istate,ispin) = one_e_dm_mo_beta_kpts(i,j,k,istate)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_dagger_mo_spin_index_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states,2) ]
+  print*,irp_here,' not implemented for kpts'
+  stop -1
+!   implicit none
+!   integer                        :: i,j,ispin,istate
+!   ispin = 1
+!   do istate = 1, N_states
+!     do j = 1, mo_num
+!       one_e_dm_dagger_mo_spin_index(j,j,istate,ispin) = 1 - one_e_dm_mo_alpha(j,j,istate)
+!       do i = j+1, mo_num
+!         one_e_dm_dagger_mo_spin_index(i,j,istate,ispin) = -one_e_dm_mo_alpha(i,j,istate)
+!         one_e_dm_dagger_mo_spin_index(j,i,istate,ispin) = -one_e_dm_mo_alpha(i,j,istate)
+!       enddo
+!     enddo
+!   enddo
+!
+!   ispin = 2
+!   do istate = 1, N_states
+!     do j = 1, mo_num
+!       one_e_dm_dagger_mo_spin_index(j,j,istate,ispin) = 1 - one_e_dm_mo_beta(j,j,istate)
+!       do i = j+1, mo_num
+!         one_e_dm_dagger_mo_spin_index(i,j,istate,ispin) = -one_e_dm_mo_beta(i,j,istate)
+!         one_e_dm_dagger_mo_spin_index(j,i,istate,ispin) = -one_e_dm_mo_beta(i,j,istate)
+!       enddo
+!     enddo
+!   enddo
+!
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, one_e_dm_mo_alpha_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_mo_beta_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! $\alpha$ and $\beta$ one-body density matrix for each state
+  ! $\gamma_{\mu\nu} = \langle\Psi|a_{\nu}^{\dagger}a_{\mu}|\Psi\rangle$
+  ! $\gamma_{\mu\nu} = \langle a_{\nu} \Psi|a_{\mu} \Psi\rangle$
+  ! $\gamma_{\mu\nu} = \sum_{IJ} c^*_J c_I \langle a_{\nu} I|a_{\mu} J\rangle$
+  END_DOC
+  !todo: implement for kpts
+  integer                        :: j,k,l,m,k_a,k_b
+  integer                        :: occ(N_int*bit_kind_size,2)
+  complex*16               :: ck, cl, ckl
+  double precision               :: phase
+  integer                        :: h1,h2,p1,p2,s1,s2, degree
+  integer                        :: ih1,ip1,kh1,kp1,kk,k_shft,ii
+  integer(bit_kind)              :: tmp_det(N_int,2), tmp_det2(N_int)
+  integer(bit_kind)              :: tmp_det_kpts(N_int,2)
+  integer                        :: exc(0:2,2),n_occ(2)
+  complex*16, allocatable  :: tmp_a(:,:,:,:), tmp_b(:,:,:,:)
+  integer                        :: krow, kcol, lrow, lcol
+
+  PROVIDE psi_det psi_coef_complex
+
+  one_e_dm_mo_alpha_kpts = (0.d0,0.d0)
+  one_e_dm_mo_beta_kpts  = (0.d0,0.d0)
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,&
+      !$OMP  tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2,ih1,ip1,kh1,kp1,kk,&
+      !$OMP  tmp_det_kpts,k_shft,ii)&
+      !$OMP SHARED(psi_det,psi_coef_complex,N_int,N_states,  &
+      !$OMP  one_e_dm_mo_alpha_kpts,one_e_dm_mo_beta_kpts,N_det,&
+      !$OMP  mo_num_per_kpt,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,&
+      !$OMP  psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,&
+      !$OMP  psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,&
+      !$OMP  psi_bilinear_matrix_values_complex, psi_bilinear_matrix_transp_values_complex,&
+      !$OMP  N_det_alpha_unique,N_det_beta_unique,irp_here,kpt_num,kpts_bitmask)
+  allocate(tmp_a(mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states), tmp_b(mo_num_per_kpt,mo_num_per_kpt,kpt_num,N_states) )
+  tmp_a = (0.d0,0.d0)
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_a=1,N_det
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    do kk=1,kpt_num
+      k_shft = (kk-1)*mo_num_per_kpt
+      do ii=1,N_int
+        tmp_det_kpts(ii,1) = iand(tmp_det(ii,1),kpts_bitmask(ii,kk))
+        tmp_det_kpts(ii,2) = iand(tmp_det(ii,2),kpts_bitmask(ii,kk))
+      enddo
+      call bitstring_to_list_ab(tmp_det_kpts, occ, n_occ, N_int)
+      do m=1,N_states
+        ck = cdabs(psi_bilinear_matrix_values_complex(k_a,m)*psi_bilinear_matrix_values_complex(k_a,m))
+        !do l=1,elec_alpha_num_kpts(kk)
+        do l=1,n_occ(1)
+          j = occ(l,1) - k_shft
+          tmp_a(j,j,kk,m) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_a == N_det) cycle
+    l = k_a+1
+    lrow = psi_bilinear_matrix_rows(l)
+    lcol = psi_bilinear_matrix_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lcol == kcol )
+      tmp_det2(:) = psi_det_alpha_unique(:, lrow)
+      call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        ! h1 occ in k
+        ! p1 occ in l
+        call get_kpt_idx_mo(h1,kh1,ih1)
+        call get_kpt_idx_mo(p1,kp1,ip1)
+        if (kh1.ne.kp1) then
+          print *,'problem in: ',irp_here,'a'
+          print *,' h1 = ',h1
+          print *,' p1 = ',p1
+          print *,'ih1 = ',ih1
+          print *,'ip1 = ',ip1
+          print *,'kh1 = ',kh1
+          print *,'kp1 = ',kp1
+          !call debug_det(tmp_det,N_int)
+          call debug_single_spindet(tmp_det(1,1),N_int)
+          call debug_single_spindet(tmp_det2,N_int)
+          call debug_single_spindet(tmp_det(1,2),N_int)
+          !call print_spindet(tmp_det2,N_int)
+          stop -2
+        endif
+        do m=1,N_states
+          ckl = dconjg(psi_bilinear_matrix_values_complex(k_a,m))*psi_bilinear_matrix_values_complex(l,m) * phase
+          tmp_a(ih1,ip1,kh1,m) += dconjg(ckl)
+          tmp_a(ip1,ih1,kh1,m) += ckl
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_rows(l)
+      lcol = psi_bilinear_matrix_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  one_e_dm_mo_alpha_kpts(:,:,:,:) = one_e_dm_mo_alpha_kpts(:,:,:,:) + tmp_a(:,:,:,:)
+  !$OMP END CRITICAL
+  deallocate(tmp_a)
+
+  tmp_b = (0.d0,0.d0)
+  !$OMP DO SCHEDULE(dynamic,64)
+  do k_b=1,N_det
+    krow = psi_bilinear_matrix_transp_rows(k_b)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_transp_columns(k_b)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow)
+    tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol)
+
+    ! Diagonal part
+    ! -------------
+
+    do kk=1,kpt_num
+      k_shft = (kk-1)*mo_num_per_kpt
+      do ii=1,N_int
+        tmp_det_kpts(ii,1) = iand(tmp_det(ii,1),kpts_bitmask(ii,kk))
+        tmp_det_kpts(ii,2) = iand(tmp_det(ii,2),kpts_bitmask(ii,kk))
+      enddo
+      call bitstring_to_list_ab(tmp_det_kpts, occ, n_occ, N_int)
+      do m=1,N_states
+        ck = cdabs(psi_bilinear_matrix_transp_values_complex(k_b,m)*psi_bilinear_matrix_transp_values_complex(k_b,m))
+        do l=1,n_occ(2)
+          j = occ(l,2) - k_shft
+          tmp_b(j,j,kk,m) += ck
+        enddo
+      enddo
+    enddo
+
+    if (k_b == N_det) cycle
+    l = k_b+1
+    lrow = psi_bilinear_matrix_transp_rows(l)
+    lcol = psi_bilinear_matrix_transp_columns(l)
+    ! Fix beta determinant, loop over alphas
+    do while ( lrow == krow )
+      tmp_det2(:) = psi_det_beta_unique(:, lcol)
+      call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int)
+      if (degree == 1) then
+        exc = 0
+        call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int)
+        call decode_exc_spin(exc,h1,p1,h2,p2)
+        call get_kpt_idx_mo(h1,kh1,ih1)
+        call get_kpt_idx_mo(p1,kp1,ip1)
+        if (kh1.ne.kp1) then
+          print *,'problem in: ',irp_here,'b'
+          print *,' h1 = ',h1
+          print *,' p1 = ',p1
+          print *,'ih1 = ',ih1
+          print *,'ip1 = ',ip1
+          print *,'kh1 = ',kh1
+          print *,'kp1 = ',kp1
+          call debug_single_spindet(tmp_det(1,2),N_int)
+          call debug_single_spindet(tmp_det2,N_int)
+          call debug_single_spindet(tmp_det(1,1),N_int)
+          stop -3
+        endif
+        do m=1,N_states
+          ckl = dconjg(psi_bilinear_matrix_transp_values_complex(k_b,m))*psi_bilinear_matrix_transp_values_complex(l,m) * phase
+          tmp_b(ih1,ip1,kh1,m) += dconjg(ckl)
+          tmp_b(ip1,ih1,kh1,m) += ckl
+        enddo
+      endif
+      l = l+1
+      if (l>N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l)
+      lcol = psi_bilinear_matrix_transp_columns(l)
+    enddo
+
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  one_e_dm_mo_beta_kpts(:,:,:,:)  = one_e_dm_mo_beta_kpts(:,:,:,:)  + tmp_b(:,:,:,:)
+  !$OMP END CRITICAL
+
+  deallocate(tmp_b)
+  !$OMP END PARALLEL
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_dm_mo_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! One-body density matrix
+   END_DOC
+   one_e_dm_mo_kpts = one_e_dm_mo_alpha_average_kpts + one_e_dm_mo_beta_average_kpts
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, one_e_spin_density_mo_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $\rho(\alpha) - \rho(\beta)$
+   END_DOC
+   one_e_spin_density_mo_kpts = one_e_dm_mo_alpha_average_kpts - one_e_dm_mo_beta_average_kpts
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, one_e_spin_density_ao_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   BEGIN_DOC
+   ! One body spin density matrix on the |AO| basis : $\rho_{AO}(\alpha) - \rho_{AO}(\beta)$
+   ! todo: verify that this is correct for complex
+   ! equivalent to using mo_to_ao_no_overlap?
+   END_DOC
+   implicit none
+   integer                        :: i,j,k,l,kk
+   complex*16               :: dm_mo
+
+   one_e_spin_density_ao_kpts = (0.d0,0.d0)
+   do kk=1,kpt_num
+     do k = 1, ao_num_per_kpt
+       do l = 1, ao_num_per_kpt
+         do i = 1, mo_num_per_kpt
+           do j = 1, mo_num_per_kpt
+             dm_mo = one_e_spin_density_mo_kpts(j,i,kk)
+             !    if(dabs(dm_mo).le.1.d-10)cycle
+             one_e_spin_density_ao_kpts(l,k,kk) += dconjg(mo_coef_kpts(k,i,kk)) * mo_coef_kpts(l,j,kk) * dm_mo
+
+           enddo
+         enddo
+       enddo
+     enddo
+   enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, one_e_dm_ao_alpha_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, one_e_dm_ao_beta_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   BEGIN_DOC
+   ! One body density matrix on the |AO| basis : $\rho_{AO}(\alpha), \rho_{AO}(\beta)$.
+   END_DOC
+   implicit none
+   integer                        :: i,j,k,l,kk
+   complex*16               :: mo_alpha,mo_beta
+
+   one_e_dm_ao_alpha_kpts = (0.d0,0.d0)
+   one_e_dm_ao_beta_kpts = (0.d0,0.d0)
+   do kk=1,kpt_num
+     do k = 1, ao_num_per_kpt
+       do l = 1, ao_num_per_kpt
+         do i = 1, mo_num_per_kpt
+           do j = 1, mo_num_per_kpt
+             mo_alpha = one_e_dm_mo_alpha_average_kpts(j,i,kk)
+             mo_beta = one_e_dm_mo_beta_average_kpts(j,i,kk)
+             !    if(dabs(dm_mo).le.1.d-10)cycle
+             one_e_dm_ao_alpha_kpts(l,k,kk) += dconjg(mo_coef_kpts(k,i,kk)) * mo_coef_kpts(l,j,kk) *  mo_alpha
+             one_e_dm_ao_beta_kpts(l,k,kk) += dconjg(mo_coef_kpts(k,i,kk)) * mo_coef_kpts(l,j,kk)  *  mo_beta
+           enddo
+         enddo
+       enddo
+     enddo
+   enddo
+
+END_PROVIDER
+
+
diff --git a/src/determinants/determinants.irp.f b/src/determinants/determinants.irp.f
index 71ee3d89..cccfb34f 100644
--- a/src/determinants/determinants.irp.f
+++ b/src/determinants/determinants.irp.f
@@ -113,7 +113,12 @@ BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,psi_det_size) ]
   logical                        :: exists
   character*(64)                 :: label
 
-  PROVIDE read_wf N_det mo_label ezfio_filename HF_bitmask mo_coef
+  PROVIDE read_wf N_det mo_label ezfio_filename HF_bitmask
+  if (is_complex) then
+    PROVIDE mo_coef_complex
+  else
+    PROVIDE mo_coef
+  endif
   psi_det = 0_bit_kind
   if (mpi_master) then
     if (read_wf) then
@@ -244,12 +249,21 @@ BEGIN_PROVIDER [ double precision, psi_average_norm_contrib, (psi_det_size) ]
   double precision               :: f
 
   psi_average_norm_contrib(:) = 0.d0
+  if (is_complex) then
+    do k=1,N_states
+      do i=1,N_det
+        psi_average_norm_contrib(i) = psi_average_norm_contrib(i) +    &
+            cdabs(psi_coef_complex(i,k)*psi_coef_complex(i,k))*state_average_weight(k)
+      enddo
+    enddo
+  else
   do k=1,N_states
     do i=1,N_det
       psi_average_norm_contrib(i) = psi_average_norm_contrib(i) +    &
           psi_coef(i,k)*psi_coef(i,k)*state_average_weight(k)
     enddo
   enddo
+  endif
   f = 1.d0/sum(psi_average_norm_contrib(1:N_det))
   do i=1,N_det
     psi_average_norm_contrib(i) = psi_average_norm_contrib(i)*f
@@ -266,7 +280,6 @@ END_PROVIDER
 
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted, (N_int,2,psi_det_size) ]
-&BEGIN_PROVIDER [ double precision, psi_coef_sorted, (psi_det_size,N_states) ]
 &BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted, (psi_det_size) ]
 &BEGIN_PROVIDER [ integer, psi_det_sorted_order, (psi_det_size) ]
    implicit none
@@ -288,9 +301,6 @@ END_PROVIDER
        psi_det_sorted(j,1,i) = psi_det(j,1,iorder(i))
        psi_det_sorted(j,2,i) = psi_det(j,2,iorder(i))
      enddo
-     do k=1,N_states
-       psi_coef_sorted(i,k) = psi_coef(iorder(i),k)
-     enddo
      psi_average_norm_contrib_sorted(i) = -psi_average_norm_contrib_sorted(i)
    enddo
    do i=1,N_det
@@ -298,29 +308,74 @@ END_PROVIDER
    enddo
 
    psi_det_sorted(:,:,N_det+1:psi_det_size) = 0_bit_kind
-   psi_coef_sorted(N_det+1:psi_det_size,:) = 0.d0
    psi_average_norm_contrib_sorted(N_det+1:psi_det_size) = 0.d0
    psi_det_sorted_order(N_det+1:psi_det_size) = 0
 
    deallocate(iorder)
 
+END_PROVIDER
+BEGIN_PROVIDER [ double precision, psi_coef_sorted, (psi_det_size,N_states) ]
+  implicit none
+  integer                        :: i,j,k
+  do i=1,N_det
+    j=psi_det_sorted_order(i)
+    do k=1,N_states
+      psi_coef_sorted(j,k) = psi_coef(i,k)
+    enddo
+  enddo
+  psi_coef_sorted(N_det+1:psi_det_size,:) = 0.d0
 END_PROVIDER
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit, (N_int,2,psi_det_size) ]
-&BEGIN_PROVIDER [ double precision, psi_coef_sorted_bit, (psi_det_size,N_states) ]
-   implicit none
-   BEGIN_DOC
-   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
-   ! They are sorted by determinants interpreted as integers. Useful
-   ! to accelerate the search of a random determinant in the wave
-   ! function.
-   END_DOC
+&BEGIN_PROVIDER [ integer, psi_det_sorted_bit_order, (psi_det_size) ]
+  implicit none
+  integer :: i,j
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: det_search_key
 
-   call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1),       &
-       psi_det_sorted_bit, psi_coef_sorted_bit, N_states)
+  allocate(bit_tmp(N_det))
 
+  do i=1,N_det
+    psi_det_sorted_bit_order(i) = i
+    !$DIR FORCEINLINE
+    bit_tmp(i) = det_search_key(psi_det(1,1,i),N_int)
+  enddo
+  call i8sort(bit_tmp,psi_det_sorted_bit_order,N_det)
+  do i=1,N_det
+    do j=1,N_int
+      psi_det_sorted_bit(j,1,i) = psi_det(j,1,psi_det_sorted_bit_order(i))
+      psi_det_sorted_bit(j,2,i) = psi_det(j,2,psi_det_sorted_bit_order(i))
+    enddo
+  enddo
+  deallocate(bit_tmp)
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, psi_coef_sorted_bit, (psi_det_size,N_states) ]
+  implicit none
+  integer :: i,k
+  do i=1,N_det
+    do k=1,N_states
+      psi_coef_sorted_bit(i,k) = psi_coef(psi_det_sorted_bit_order(i),k)
+    enddo
+  enddo
+END_PROVIDER
+
+
+! BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit, (N_int,2,psi_det_size) ]
+!&BEGIN_PROVIDER [ double precision, psi_coef_sorted_bit, (psi_det_size,N_states) ]
+!   implicit none
+!   BEGIN_DOC
+!   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
+!   ! They are sorted by determinants interpreted as integers. Useful
+!   ! to accelerate the search of a random determinant in the wave
+!   ! function.
+!   END_DOC
+!
+!   call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1),       &
+!       psi_det_sorted_bit, psi_coef_sorted_bit, N_states)
+!
+!END_PROVIDER
+
 subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, sze, det_out, coef_out, N_st)
    use bitmasks
    implicit none
@@ -369,24 +424,46 @@ end
 
  BEGIN_PROVIDER [ double precision, psi_coef_max, (N_states) ]
 &BEGIN_PROVIDER [ double precision, psi_coef_min, (N_states) ]
-&BEGIN_PROVIDER [ double precision, abs_psi_coef_max, (N_states) ]
-&BEGIN_PROVIDER [ double precision, abs_psi_coef_min, (N_states) ]
-   implicit none
-   BEGIN_DOC
-   ! Max and min values of the coefficients
-   END_DOC
-   integer                        :: i
-   do i=1,N_states
-     psi_coef_min(i) = minval(psi_coef(:,i))
-     psi_coef_max(i) = maxval(psi_coef(:,i))
-     abs_psi_coef_min(i) = minval( dabs(psi_coef(:,i)) )
-     abs_psi_coef_max(i) = maxval( dabs(psi_coef(:,i)) )
-     call write_double(6,psi_coef_max(i), 'Max coef')
-     call write_double(6,psi_coef_min(i), 'Min coef')
-     call write_double(6,abs_psi_coef_max(i), 'Max abs coef')
-     call write_double(6,abs_psi_coef_min(i), 'Min abs coef')
-   enddo
+  implicit none
+  BEGIN_DOC
+  ! Max and min values of the coefficients
+  END_DOC
+  integer                        :: i
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
+  do i=1,N_states
+    psi_coef_min(i) = minval(psi_coef(:,i))
+    psi_coef_max(i) = maxval(psi_coef(:,i))
+    call write_double(6,psi_coef_max(i), 'Max coef')
+    call write_double(6,psi_coef_min(i), 'Min coef')
+  enddo
+END_PROVIDER
 
+
+ BEGIN_PROVIDER [ double precision, abs_psi_coef_max, (N_states) ]
+&BEGIN_PROVIDER [ double precision, abs_psi_coef_min, (N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Max and min magnitudes of the coefficients
+  END_DOC
+  integer                        :: i
+  if (is_complex) then
+    do i=1,N_states
+      abs_psi_coef_min(i) = minval( cdabs(psi_coef_complex(:,i)) )
+      abs_psi_coef_max(i) = maxval( cdabs(psi_coef_complex(:,i)) )
+      call write_double(6,abs_psi_coef_max(i), 'Max abs coef')
+      call write_double(6,abs_psi_coef_min(i), 'Min abs coef')
+    enddo
+  else
+    do i=1,N_states
+      abs_psi_coef_min(i) = minval( dabs(psi_coef(:,i)) )
+      abs_psi_coef_max(i) = maxval( dabs(psi_coef(:,i)) )
+      call write_double(6,abs_psi_coef_max(i), 'Max abs coef')
+      call write_double(6,abs_psi_coef_min(i), 'Min abs coef')
+    enddo
+  endif
 END_PROVIDER
 
 
@@ -442,10 +519,17 @@ end
 subroutine save_ref_determinant
   implicit none
   use bitmasks
+  if (is_complex) then
+    complex*16 :: buffer_c(1,N_states)
+    buffer_c = (0.d0,0.d0)
+    buffer_c(1,1) = (1.d0,0.d0)
+    call save_wavefunction_general_complex(1,N_states,ref_bitmask,1,buffer_c)
+  else
   double precision               :: buffer(1,N_states)
   buffer = 0.d0
   buffer(1,1) = 1.d0
   call save_wavefunction_general(1,N_states,ref_bitmask,1,buffer)
+  endif
 end
 
 
@@ -467,7 +551,12 @@ subroutine save_wavefunction_truncated(thr)
     endif
   enddo
   if (mpi_master) then
+    if (is_complex) then
+      call save_wavefunction_general_complex(N_det_save,min(N_states,N_det_save),&
+                psi_det_sorted,size(psi_coef_sorted_complex,1),psi_coef_sorted_complex)
+    else
     call save_wavefunction_general(N_det_save,min(N_states,N_det_save),psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+    endif
   endif
 end
 
@@ -485,7 +574,12 @@ subroutine save_wavefunction
     return
   endif
   if (mpi_master) then
+    if (is_complex) then
+      call save_wavefunction_general_complex(N_det,N_states,&
+                  psi_det_sorted,size(psi_coef_sorted_complex,1),psi_coef_sorted_complex)
+    else
     call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+    endif
   endif
 end
 
@@ -497,7 +591,12 @@ subroutine save_wavefunction_unsorted
   !  Save the wave function into the |EZFIO| file
   END_DOC
   if (mpi_master) then
+    if (is_complex) then
+      call save_wavefunction_general_complex(N_det,min(N_states,N_det),&
+                   psi_det,size(psi_coef_complex,1),psi_coef_complex)
+    else
     call save_wavefunction_general(N_det,min(N_states,N_det),psi_det,size(psi_coef,1),psi_coef)
+    endif
   endif
 end
 
diff --git a/src/determinants/determinants_cplx.irp.f b/src/determinants/determinants_cplx.irp.f
new file mode 100644
index 00000000..76e4d9fc
--- /dev/null
+++ b/src/determinants/determinants_cplx.irp.f
@@ -0,0 +1,350 @@
+use bitmasks
+
+
+
+
+BEGIN_PROVIDER [ complex*16, psi_coef_complex, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file
+  ! is empty.
+  END_DOC
+
+  integer                        :: i,k, N_int2
+  logical                        :: exists
+  character*(64)                 :: label
+
+  PROVIDE read_wf N_det mo_label ezfio_filename
+  psi_coef_complex = (0.d0,0.d0)
+  do i=1,min(N_states,psi_det_size)
+    psi_coef_complex(i,i) = (1.d0,0.d0)
+  enddo
+
+  if (mpi_master) then
+    if (read_wf) then
+      call ezfio_has_determinants_psi_coef_complex(exists)
+      if (exists) then
+        call ezfio_has_determinants_mo_label(exists)
+        if (exists) then
+          call ezfio_get_determinants_mo_label(label)
+          exists = (label == mo_label)
+        endif
+      endif
+
+      if (exists) then
+
+        complex*16, allocatable  :: psi_coef_read(:,:)
+        allocate (psi_coef_read(N_det,N_states))
+        print *,  'Read psi_coef_complex', N_det, N_states
+        call ezfio_get_determinants_psi_coef_complex(psi_coef_read)
+        do k=1,N_states
+          do i=1,N_det
+            psi_coef_complex(i,k) = psi_coef_read(i,k)
+          enddo
+        enddo
+        deallocate(psi_coef_read)
+
+      endif
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call     MPI_BCAST( psi_coef_complex, size(psi_coef_complex), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read psi_coef_complex with MPI'
+    endif
+  IRP_ENDIF
+
+
+
+END_PROVIDER
+
+!==============================================================================!
+!                                                                              !
+!                               Sorting providers                              !
+!                                                                              !
+!==============================================================================!
+
+BEGIN_PROVIDER [ complex*16, psi_coef_sorted_complex, (psi_det_size,N_states) ]
+  implicit none
+  integer                        :: i,j,k
+  do i=1,N_det
+    j=psi_det_sorted_order(i)
+    do k=1,N_states
+      psi_coef_sorted_complex(j,k) = psi_coef_complex(i,k)
+    enddo
+  enddo
+  psi_coef_sorted_complex(N_det+1:psi_det_size,:) = (0.d0,0.d0)
+END_PROVIDER
+
+!!TODO: implement for complex (new psi_det_sorted? reuse? combine complex provider with real?)
+! BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_complex, (N_int,2,psi_det_size) ]
+!&BEGIN_PROVIDER [ complex*16, psi_coef_sorted_complex, (psi_det_size,N_states) ]
+!&BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_complex, (psi_det_size) ]
+!&BEGIN_PROVIDER [ integer, psi_det_sorted_order_complex, (psi_det_size) ]
+!   implicit none
+!   BEGIN_DOC
+!   ! Wave function sorted by determinants contribution to the norm (state-averaged)
+!   !
+!   ! psi_det_sorted_order(i) -> k : index in psi_det
+!   END_DOC
+!   integer                        :: i,j,k
+!   integer, allocatable           :: iorder(:)
+!   allocate ( iorder(N_det) )
+!   do i=1,N_det
+!     psi_average_norm_contrib_sorted_complex(i) = -psi_average_norm_contrib(i)
+!     iorder(i) = i
+!   enddo
+!   call dsort(psi_average_norm_contrib_sorted_complex,iorder,N_det)
+!   do i=1,N_det
+!     do j=1,N_int
+!       psi_det_sorted_complex(j,1,i) = psi_det(j,1,iorder(i))
+!       psi_det_sorted_complex(j,2,i) = psi_det(j,2,iorder(i))
+!     enddo
+!     do k=1,N_states
+!       psi_coef_sorted_complex(i,k) = psi_coef_complex(iorder(i),k)
+!     enddo
+!     psi_average_norm_contrib_sorted_complex(i) = -psi_average_norm_contrib_sorted_complex(i)
+!   enddo
+!   do i=1,N_det
+!     psi_det_sorted_order_complex(iorder(i)) = i
+!   enddo
+!
+!   psi_det_sorted_complex(:,:,N_det+1:psi_det_size) = 0_bit_kind
+!   psi_coef_sorted_complex(N_det+1:psi_det_size,:) = (0.d0,0.d0)
+!   psi_average_norm_contrib_sorted_complex(N_det+1:psi_det_size) = 0.d0
+!   psi_det_sorted_order_complex(N_det+1:psi_det_size) = 0
+!
+!   deallocate(iorder)
+!
+!END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, psi_coef_sorted_bit_complex, (psi_det_size,N_states) ]
+  implicit none
+  integer :: i,k
+  do i=1,N_det
+    do k=1,N_states
+      psi_coef_sorted_bit_complex(i,k) = psi_coef_complex(psi_det_sorted_bit_order(i),k)
+    enddo
+  enddo
+END_PROVIDER
+
+subroutine sort_dets_by_det_search_key_complex(Ndet, det_in, coef_in, sze, det_out, coef_out, N_st)
+   use bitmasks
+   implicit none
+   integer, intent(in)            :: Ndet, N_st, sze
+   integer(bit_kind), intent(in)  :: det_in  (N_int,2,sze)
+   complex*16 , intent(in)  :: coef_in(sze,N_st)
+   integer(bit_kind), intent(out) :: det_out (N_int,2,sze)
+   complex*16 , intent(out) :: coef_out(sze,N_st)
+   BEGIN_DOC
+   ! Determinants are sorted according to their :c:func:`det_search_key`.
+   ! Useful to accelerate the search of a random determinant in the wave
+   ! function.
+   !
+   ! /!\ The first dimension of coef_out and coef_in need to be psi_det_size
+   !
+   END_DOC
+   integer                        :: i,j,k
+   integer, allocatable           :: iorder(:)
+   integer*8, allocatable         :: bit_tmp(:)
+   integer*8, external            :: det_search_key
+
+   allocate ( iorder(Ndet), bit_tmp(Ndet) )
+
+   do i=1,Ndet
+     iorder(i) = i
+     !$DIR FORCEINLINE
+     bit_tmp(i) = det_search_key(det_in(1,1,i),N_int)
+   enddo
+   call i8sort(bit_tmp,iorder,Ndet)
+   !DIR$ IVDEP
+   do i=1,Ndet
+     do j=1,N_int
+       det_out(j,1,i) = det_in(j,1,iorder(i))
+       det_out(j,2,i) = det_in(j,2,iorder(i))
+     enddo
+     do k=1,N_st
+       coef_out(i,k) = coef_in(iorder(i),k)
+     enddo
+   enddo
+
+   deallocate(iorder, bit_tmp)
+
+end
+
+
+!==============================================================================!
+!                                                                              !
+!                             Read/write routines                              !
+!                                                                              !
+!==============================================================================!
+
+
+
+subroutine save_wavefunction_general_complex(ndet,nstates,psidet,dim_psicoef,psicoef)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  include 'constants.include.F'
+  integer, intent(in)            :: ndet,nstates,dim_psicoef
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,ndet)
+  complex*16, intent(in)   :: psicoef(dim_psicoef,nstates)
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  complex*16, allocatable  :: psi_coef_save(:,:)
+
+  double precision               :: accu_norm
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndet,N_det_qp_edit)
+
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndet)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    allocate (psi_det_save(N_int,2,ndet))
+    do i=1,ndet
+      do j=1,2
+        do k=1,N_int
+          psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psi_coef_save(ndet,nstates))
+    do k=1,nstates
+      do i=1,ndet
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+      call normalize_complex(psi_coef_save(1,k),ndet)
+    enddo
+
+    call ezfio_set_determinants_psi_coef_complex(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    allocate (psi_coef_save(ndet_qp_edit,nstates))
+    do k=1,nstates
+      do i=1,ndet_qp_edit
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+      call normalize_complex(psi_coef_save(1,k),ndet_qp_edit)
+    enddo
+
+    call ezfio_set_determinants_psi_coef_complex_qp_edit(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    call write_int(6,ndet,'Saved determinants')
+  endif
+end
+
+
+
+subroutine save_wavefunction_specified_complex(ndet,nstates,psidet,psicoef,ndetsave,index_det_save)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  integer, intent(in)            :: ndet,nstates
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,ndet)
+  complex*16, intent(in)   :: psicoef(ndet,nstates)
+  integer, intent(in)            :: index_det_save(ndet)
+  integer, intent(in)            :: ndetsave
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  complex*16, allocatable  :: psi_coef_save(:,:)
+  integer*8                      :: det_8(100)
+  integer(bit_kind)              :: det_bk((100*8)/bit_kind)
+  integer                        :: N_int2
+  equivalence (det_8, det_bk)
+
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndetsave,N_det_qp_edit)
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndetsave)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    N_int2 = (N_int*bit_kind)/8
+    allocate (psi_det_save(N_int2,2,ndetsave))
+    do i=1,ndetsave
+      do k=1,N_int
+        det_bk(k) = psidet(k,1,index_det_save(i))
+      enddo
+      do k=1,N_int2
+        psi_det_save(k,1,i) = det_8(k)
+      enddo
+      do k=1,N_int
+        det_bk(k) = psidet(k,2,index_det_save(i))
+      enddo
+      do k=1,N_int2
+        psi_det_save(k,2,i) = det_8(k)
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psi_coef_save(ndetsave,nstates))
+    double precision               :: accu_norm(nstates)
+    accu_norm = 0.d0
+    do k=1,nstates
+      do i=1,ndetsave
+        accu_norm(k) = accu_norm(k) + cdabs(psicoef(index_det_save(i),k) * psicoef(index_det_save(i),k))
+        psi_coef_save(i,k) = psicoef(index_det_save(i),k)
+      enddo
+    enddo
+    do k = 1, nstates
+      accu_norm(k) = 1.d0/dsqrt(accu_norm(k))
+    enddo
+    do k=1,nstates
+      do i=1,ndetsave
+        psi_coef_save(i,k) = psi_coef_save(i,k) * accu_norm(k)
+      enddo
+    enddo
+
+    call ezfio_set_determinants_psi_coef_complex(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    allocate (psi_coef_save(ndet_qp_edit,nstates))
+    accu_norm = 0.d0
+    do k=1,nstates
+      do i=1,ndet_qp_edit
+        accu_norm(k) = accu_norm(k) + cdabs(psicoef(index_det_save(i),k) * psicoef(index_det_save(i),k))
+        psi_coef_save(i,k) = psicoef(index_det_save(i),k)
+      enddo
+    enddo
+    do k = 1, nstates
+      accu_norm(k) = 1.d0/dsqrt(accu_norm(k))
+    enddo
+    do k=1,nstates
+      do i=1,ndet_qp_edit
+        psi_coef_save(i,k) = psi_coef_save(i,k) * accu_norm(k)
+      enddo
+    enddo
+    !TODO: should this be psi_coef_complex_qp_edit?
+    call ezfio_set_determinants_psi_coef_complex(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    call write_int(6,ndet,'Saved determinants')
+  endif
+end
+
diff --git a/src/determinants/energy.irp.f b/src/determinants/energy.irp.f
index 63be7971..79e110eb 100644
--- a/src/determinants/energy.irp.f
+++ b/src/determinants/energy.irp.f
@@ -21,11 +21,19 @@ BEGIN_PROVIDER [ double precision, barycentric_electronic_energy, (N_states) ]
 
  barycentric_electronic_energy(:) = 0.d0
 
+ if (is_complex) then
+   do istate=1,N_states
+     do i=1,N_det
+       barycentric_electronic_energy(istate) += cdabs(psi_coef_complex(i,istate)*psi_coef_complex(i,istate))*diagonal_H_matrix_on_psi_det(i)
+     enddo
+   enddo
+ else
  do istate=1,N_states
    do i=1,N_det
      barycentric_electronic_energy(istate) += psi_coef(i,istate)*psi_coef(i,istate)*diagonal_H_matrix_on_psi_det(i)
    enddo
  enddo
+ endif
 
 END_PROVIDER
 
diff --git a/src/determinants/fock_diag.irp.f b/src/determinants/fock_diag.irp.f
index a8ce33b8..5c8f3603 100644
--- a/src/determinants/fock_diag.irp.f
+++ b/src/determinants/fock_diag.irp.f
@@ -29,12 +29,12 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
     call debug_det(det_ref,N_int)
     stop -1
   endif
-
+  
   ! Occupied MOs
   do ii=1,elec_alpha_num
     i = occ(ii,1)
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
-    E0 = E0 + mo_one_e_integrals(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals_diag(i)
+    E0 = E0 + mo_one_e_integrals_diag(i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
       if (i==j) cycle
@@ -49,8 +49,8 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
   enddo
   do ii=1,elec_beta_num
     i = occ(ii,2)
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
-    E0 = E0 + mo_one_e_integrals(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals_diag(i)
+    E0 = E0 + mo_one_e_integrals_diag(i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
       if (i==j) cycle
@@ -66,7 +66,7 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
   ! Virtual MOs
   do i=1,mo_num
     if (fock_diag_tmp(1,i) /= 0.d0) cycle
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals_diag(i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
       fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
@@ -78,7 +78,7 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
   enddo
   do i=1,mo_num
     if (fock_diag_tmp(2,i) /= 0.d0) cycle
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals_diag(i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
       fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f
index 98fafb4a..fcce1645 100644
--- a/src/determinants/h_apply.irp.f
+++ b/src/determinants/h_apply.irp.f
@@ -6,6 +6,7 @@ type H_apply_buffer_type
   integer                        :: sze
   integer(bit_kind), pointer     :: det(:,:,:)
   double precision , pointer     :: coef(:,:)
+  complex*16 , pointer           :: coef_complex(:,:)
   double precision , pointer     :: e2(:,:)
 end type H_apply_buffer_type
 
@@ -26,17 +27,22 @@ type(H_apply_buffer_type), pointer :: H_apply_buffer(:)
     allocate(H_apply_buffer(0:nproc-1))
     iproc = 0
     !$OMP PARALLEL PRIVATE(iproc) DEFAULT(NONE)  &
-    !$OMP SHARED(H_apply_buffer,N_int,sze,N_states,H_apply_buffer_lock)
+    !$OMP SHARED(H_apply_buffer,N_int,sze,N_states,H_apply_buffer_lock,is_complex)
     !$   iproc = omp_get_thread_num()
     H_apply_buffer(iproc)%N_det = 0
     H_apply_buffer(iproc)%sze = sze
     allocate (                                                       &
         H_apply_buffer(iproc)%det(N_int,2,sze),                      &
-        H_apply_buffer(iproc)%coef(sze,N_states),                    &
         H_apply_buffer(iproc)%e2(sze,N_states)                       &
         )
+    if (is_complex) then
+      allocate(H_apply_buffer(iproc)%coef_complex(sze,N_states))
+      H_apply_buffer(iproc)%coef_complex = (0.d0,0.d0)
+    else
+      allocate(H_apply_buffer(iproc)%coef(sze,N_states))
+      H_apply_buffer(iproc)%coef = 0.d0
+    endif
     H_apply_buffer(iproc)%det  = 0_bit_kind
-    H_apply_buffer(iproc)%coef = 0.d0
     H_apply_buffer(iproc)%e2   = 0.d0
     call omp_init_lock(H_apply_buffer_lock(1,iproc))
     !$OMP END PARALLEL
@@ -59,6 +65,7 @@ subroutine resize_H_apply_buffer(new_size,iproc)
   integer, intent(in)            :: new_size, iproc
   integer(bit_kind), pointer     :: buffer_det(:,:,:)
   double precision,  pointer     :: buffer_coef(:,:)
+  complex*16,  pointer           :: buffer_coef_complex(:,:)
   double precision,  pointer     :: buffer_e2(:,:)
   integer                        :: i,j,k
   integer                        :: Ndet
@@ -74,9 +81,14 @@ subroutine resize_H_apply_buffer(new_size,iproc)
   ASSERT (iproc < nproc)
 
   allocate ( buffer_det(N_int,2,new_size),                           &
-      buffer_coef(new_size,N_states),                                &
       buffer_e2(new_size,N_states) )
-  buffer_coef = 0.d0
+  if (is_complex) then
+    allocate(buffer_coef_complex(new_size,N_states))
+    buffer_coef_complex = (0.d0,0.d0)
+  else
+    allocate(buffer_coef(new_size,N_states))
+    buffer_coef = 0.d0
+  endif
   buffer_e2 = 0.d0
   do i=1,min(new_size,H_apply_buffer(iproc)%N_det)
     do k=1,N_int
@@ -89,6 +101,15 @@ subroutine resize_H_apply_buffer(new_size,iproc)
   deallocate(H_apply_buffer(iproc)%det)
   H_apply_buffer(iproc)%det => buffer_det
 
+  if (is_complex) then
+    do k=1,N_states
+      do i=1,min(new_size,H_apply_buffer(iproc)%N_det)
+        buffer_coef_complex(i,k) = H_apply_buffer(iproc)%coef_complex(i,k)
+      enddo
+    enddo
+    deallocate(H_apply_buffer(iproc)%coef_complex)
+    H_apply_buffer(iproc)%coef_complex => buffer_coef_complex
+  else
   do k=1,N_states
     do i=1,min(new_size,H_apply_buffer(iproc)%N_det)
       buffer_coef(i,k) = H_apply_buffer(iproc)%coef(i,k)
@@ -96,6 +117,7 @@ subroutine resize_H_apply_buffer(new_size,iproc)
   enddo
   deallocate(H_apply_buffer(iproc)%coef)
   H_apply_buffer(iproc)%coef => buffer_coef
+  endif
 
   do k=1,N_states
     do i=1,min(new_size,H_apply_buffer(iproc)%N_det)
@@ -119,6 +141,7 @@ subroutine copy_H_apply_buffer_to_wf
   END_DOC
   integer(bit_kind), allocatable :: buffer_det(:,:,:)
   double precision, allocatable  :: buffer_coef(:,:)
+  complex*16, allocatable        :: buffer_coef_complex(:,:)
   integer                        :: i,j,k
   integer                        :: N_det_old
 
@@ -128,7 +151,12 @@ subroutine copy_H_apply_buffer_to_wf
   ASSERT (N_int > 0)
   ASSERT (N_det > 0)
 
-  allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) )
+  allocate ( buffer_det(N_int,2,N_det))
+  if (is_complex) then
+    allocate(buffer_coef_complex(N_det,N_states))
+  else
+    allocate(buffer_coef(N_det,N_states))
+  endif
 
   ! Backup determinants
   j=0
@@ -142,6 +170,17 @@ subroutine copy_H_apply_buffer_to_wf
   N_det_old = j
 
   ! Backup coefficients
+  if (is_complex) then
+    do k=1,N_states
+      j=0
+      do i=1,N_det
+        if (pruned(i)) cycle  ! Pruned determinants
+        j += 1
+        buffer_coef_complex(j,k) = psi_coef_complex(i,k)
+      enddo
+      ASSERT ( j == N_det_old )
+    enddo
+  else
   do k=1,N_states
     j=0
     do i=1,N_det
@@ -151,6 +190,7 @@ subroutine copy_H_apply_buffer_to_wf
     enddo
     ASSERT ( j == N_det_old )
   enddo
+  endif
 
   ! Update N_det
   N_det = N_det_old
@@ -170,13 +210,56 @@ subroutine copy_H_apply_buffer_to_wf
     ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num)
     ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num )
   enddo
+  if (is_complex) then
+    do k=1,N_states
+      do i=1,N_det_old
+        psi_coef_complex(i,k) = buffer_coef_complex(i,k)
+      enddo
+    enddo
+  else
   do k=1,N_states
     do i=1,N_det_old
       psi_coef(i,k) = buffer_coef(i,k)
     enddo
   enddo
+  endif
 
   ! Copy new buffers
+  logical :: found_duplicates
+
+  if (is_complex) then
+    !$OMP PARALLEL DEFAULT(SHARED)                                     &
+        !$OMP PRIVATE(j,k,i) FIRSTPRIVATE(N_det_old)                   &
+        !$OMP SHARED(N_int,H_apply_buffer,psi_det,psi_coef_complex,N_states,psi_det_size)
+    j=0
+    !$ j=omp_get_thread_num()
+    do k=0,j-1
+      N_det_old += H_apply_buffer(k)%N_det
+    enddo
+    do i=1,H_apply_buffer(j)%N_det
+      do k=1,N_int
+        psi_det(k,1,i+N_det_old) = H_apply_buffer(j)%det(k,1,i)
+        psi_det(k,2,i+N_det_old) = H_apply_buffer(j)%det(k,2,i)
+      enddo
+      ASSERT (sum(popcnt(psi_det(:,1,i+N_det_old))) == elec_alpha_num)
+      ASSERT (sum(popcnt(psi_det(:,2,i+N_det_old))) == elec_beta_num )
+    enddo
+    do k=1,N_states
+      do i=1,H_apply_buffer(j)%N_det
+        psi_coef_complex(i+N_det_old,k) = H_apply_buffer(j)%coef_complex(i,k)
+      enddo
+    enddo
+    !$OMP BARRIER
+    H_apply_buffer(j)%N_det = 0
+    !$OMP END PARALLEL
+    SOFT_TOUCH N_det psi_det psi_coef_complex
+
+    call remove_duplicates_in_psi_det(found_duplicates)
+    do k=1,N_states
+      call normalize_complex(psi_coef_complex(1,k),N_det)
+    enddo
+    SOFT_TOUCH N_det psi_det psi_coef_complex
+  else
 
   !$OMP PARALLEL DEFAULT(SHARED)                                     &
       !$OMP PRIVATE(j,k,i) FIRSTPRIVATE(N_det_old)                   &
@@ -204,13 +287,13 @@ subroutine copy_H_apply_buffer_to_wf
   !$OMP END PARALLEL
   SOFT_TOUCH N_det psi_det psi_coef
 
-  logical :: found_duplicates
   call remove_duplicates_in_psi_det(found_duplicates)
   do k=1,N_states
     call normalize(psi_coef(1,k),N_det)
   enddo
   SOFT_TOUCH N_det psi_det psi_coef
 
+  endif
 end
 
 subroutine remove_duplicates_in_psi_det(found_duplicates)
@@ -275,6 +358,29 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
   !$OMP END DO
   !$OMP END PARALLEL
 
+  if (is_complex) then
+    if (found_duplicates) then
+      k=0
+      do i=1,N_det
+        if (.not.duplicate(i)) then
+          k += 1
+          psi_det(:,:,k) = psi_det_sorted_bit (:,:,i)
+          psi_coef_complex(k,:)  = psi_coef_sorted_bit_complex(i,:)
+        else
+          if (sum(cdabs(psi_coef_sorted_bit_complex(i,:))) /= 0.d0 ) then
+            psi_coef_complex(k,:)  = psi_coef_sorted_bit_complex(i,:)
+          endif
+        endif
+      enddo
+      N_det = k
+      psi_det_sorted_bit(:,:,1:N_det) = psi_det(:,:,1:N_det)
+      psi_coef_sorted_bit_complex(1:N_det,:) = psi_coef_complex(1:N_det,:)
+      TOUCH N_det psi_det psi_coef_complex psi_det_sorted_bit psi_coef_sorted_bit_complex c0_weight
+    endif
+    psi_det = psi_det_sorted
+    psi_coef_complex = psi_coef_sorted_complex
+    SOFT_TOUCH psi_det psi_coef_complex psi_det_sorted_bit psi_coef_sorted_bit_complex
+  else
   if (found_duplicates) then
     k=0
     do i=1,N_det
@@ -296,6 +402,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
   psi_det = psi_det_sorted
   psi_coef = psi_coef_sorted
   SOFT_TOUCH psi_det psi_coef psi_det_sorted_bit psi_coef_sorted_bit
+  endif
   deallocate (duplicate,bit_tmp)
 end
 
@@ -329,11 +436,19 @@ subroutine fill_H_apply_buffer_no_selection(n_selected,det_buffer,Nint,iproc)
     ASSERT (sum(popcnt(H_apply_buffer(iproc)%det(:,1,i+H_apply_buffer(iproc)%N_det)) )== elec_alpha_num)
     ASSERT (sum(popcnt(H_apply_buffer(iproc)%det(:,2,i+H_apply_buffer(iproc)%N_det))) == elec_beta_num)
   enddo
+  if (is_complex) then
+    do j=1,N_states
+      do i=1,N_selected
+        H_apply_buffer(iproc)%coef_complex(i+H_apply_buffer(iproc)%N_det,j) = (0.d0,0.d0)
+      enddo
+    enddo
+  else
   do j=1,N_states
     do i=1,N_selected
       H_apply_buffer(iproc)%coef(i+H_apply_buffer(iproc)%N_det,j) = 0.d0
     enddo
   enddo
+  endif
   H_apply_buffer(iproc)%N_det = new_size
   do i=1,H_apply_buffer(iproc)%N_det
     ASSERT (sum(popcnt(H_apply_buffer(iproc)%det(:,1,i)) )== elec_alpha_num)
@@ -341,4 +456,3 @@ subroutine fill_H_apply_buffer_no_selection(n_selected,det_buffer,Nint,iproc)
   enddo
   call omp_unset_lock(H_apply_buffer_lock(1,iproc))
 end
-
diff --git a/src/determinants/h_apply_nozmq.template.f b/src/determinants/h_apply_nozmq.template.f
index bd261bbe..6d769556 100644
--- a/src/determinants/h_apply_nozmq.template.f
+++ b/src/determinants/h_apply_nozmq.template.f
@@ -17,8 +17,11 @@ subroutine $subroutine($params_main)
   double precision, allocatable  :: fock_diag_tmp(:,:)
 
   $initialization
+  if (is_complex) then
+    PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators_complex
+  else
   PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators
-
+  endif
 
   call wall_time(wall_0)
 
diff --git a/src/determinants/occ_pattern.irp.f b/src/determinants/occ_pattern.irp.f
index d4d8c42b..b3bce3f5 100644
--- a/src/determinants/occ_pattern.irp.f
+++ b/src/determinants/occ_pattern.irp.f
@@ -401,12 +401,21 @@ BEGIN_PROVIDER [ double precision, weight_occ_pattern, (N_occ_pattern,N_states)
  END_DOC
  integer :: i,j,k
  weight_occ_pattern = 0.d0
+ if (is_complex) then
+ do i=1,N_det
+  j = det_to_occ_pattern(i)
+  do k=1,N_states
+    weight_occ_pattern(j,k) += cdabs(psi_coef_complex(i,k) * psi_coef_complex(i,k))
+  enddo
+ enddo
+ else
  do i=1,N_det
   j = det_to_occ_pattern(i)
   do k=1,N_states
     weight_occ_pattern(j,k) += psi_coef(i,k) * psi_coef(i,k)
   enddo
  enddo
+ endif
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, weight_occ_pattern_average, (N_occ_pattern) ]
@@ -416,12 +425,21 @@ BEGIN_PROVIDER [ double precision, weight_occ_pattern_average, (N_occ_pattern) ]
  END_DOC
  integer :: i,j,k
  weight_occ_pattern_average(:) = 0.d0
+ if (is_complex) then
+  do i=1,N_det
+   j = det_to_occ_pattern(i)
+   do k=1,N_states
+     weight_occ_pattern_average(j) += cdabs(psi_coef_complex(i,k) * psi_coef_complex(i,k)) * state_average_weight(k)
+   enddo
+  enddo
+ else
  do i=1,N_det
   j = det_to_occ_pattern(i)
   do k=1,N_states
     weight_occ_pattern_average(j) += psi_coef(i,k) * psi_coef(i,k) * state_average_weight(k)
   enddo
  enddo
+ endif
 END_PROVIDER
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_occ_pattern_sorted, (N_int,2,N_occ_pattern) ]
@@ -495,7 +513,7 @@ subroutine make_s2_eigenfunction
       N_det_new += 1
       det_buffer(:,:,N_det_new) = d(:,:,j)
       if (N_det_new == bufsze) then
-        call fill_H_apply_buffer_no_selection(bufsze,det_buffer,N_int,ithread)
+        call fill_h_apply_buffer_no_selection(bufsze,det_buffer,N_int,ithread)
         N_det_new = 0
       endif
     enddo
@@ -510,8 +528,12 @@ subroutine make_s2_eigenfunction
   !$OMP END PARALLEL
 
   if (update) then
-    call copy_H_apply_buffer_to_wf
+    call copy_h_apply_buffer_to_wf
+    if (is_complex) then
+      TOUCH N_det psi_coef_complex psi_det psi_occ_pattern N_occ_pattern
+    else
     TOUCH N_det psi_coef psi_det psi_occ_pattern N_occ_pattern
+    endif
   endif
   call write_time(6)
 
diff --git a/src/determinants/psi_cas.irp.f b/src/determinants/psi_cas.irp.f
index 19a1c260..d262622c 100644
--- a/src/determinants/psi_cas.irp.f
+++ b/src/determinants/psi_cas.irp.f
@@ -150,7 +150,20 @@ END_PROVIDER
  double precision :: hij,norm,u_dot_v
   psi_cas_energy = 0.d0
 
-
+  if (is_complex) then
+    complex*16 :: hij_c
+    do k = 1, N_states
+      norm = 0.d0
+      do i = 1, N_det_cas_complex
+        norm += cdabs(psi_cas_coef_complex(i,k) * psi_cas_coef_complex(i,k))
+        do j = 1, N_det_cas_complex
+          !TODO: accum imag parts to ensure that sum is zero?
+          psi_cas_energy(k) += dble(dconjg(psi_cas_coef_complex(i,k)) * psi_cas_coef_complex(j,k) * H_matrix_cas_complex(i,j))
+        enddo
+      enddo
+      psi_cas_energy(k) = psi_cas_energy(k) /norm
+    enddo
+  else
   do k = 1, N_states
    norm = 0.d0
    do i = 1, N_det_cas
@@ -161,6 +174,7 @@ END_PROVIDER
    enddo
    psi_cas_energy(k) = psi_cas_energy(k) /norm
   enddo
+  endif
 
 END_PROVIDER
 
diff --git a/src/determinants/psi_cas_cplx.irp.f b/src/determinants/psi_cas_cplx.irp.f
new file mode 100644
index 00000000..ccc0abef
--- /dev/null
+++ b/src/determinants/psi_cas_cplx.irp.f
@@ -0,0 +1,145 @@
+use bitmasks
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_cas_complex, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ complex*16, psi_cas_coef_complex,  (psi_det_size,n_states) ]
+&BEGIN_PROVIDER [ integer, idx_cas_complex, (psi_det_size) ]
+&BEGIN_PROVIDER [ integer, N_det_cas_complex ]
+  implicit none
+  BEGIN_DOC
+  ! |CAS| wave function, defined from the application of the |CAS| bitmask on the
+  ! determinants. idx_cas gives the indice of the |CAS| determinant in psi_det.
+  END_DOC
+  integer                        :: i, k, l
+  logical                        :: good
+  n_det_cas_complex = 0
+  do i=1,N_det
+    do l = 1, N_states
+     psi_cas_coef_complex(i,l) = (0.d0,0.d0)
+    enddo
+    good = .True.
+    do k=1,N_int
+      good = good .and. (                                          &
+          iand(not(act_bitmask(k,1)), psi_det(k,1,i)) ==         &
+          iand(not(act_bitmask(k,1)), hf_bitmask(k,1)) ) .and. (  &
+          iand(not(act_bitmask(k,2)), psi_det(k,2,i)) ==         &
+          iand(not(act_bitmask(k,2)), hf_bitmask(k,2)) )
+    enddo
+    if (good) then
+      exit
+    endif
+    if (good) then
+      n_det_cas_complex = n_det_cas_complex+1
+      do k=1,N_int
+        psi_cas_complex(k,1,n_det_cas_complex) = psi_det(k,1,i)
+        psi_cas_complex(k,2,n_det_cas_complex) = psi_det(k,2,i)
+      enddo
+      idx_cas(n_det_cas_complex) = i
+      do k=1,N_states
+        psi_cas_coef_complex(n_det_cas_complex,k) = psi_coef_complex(i,k)
+      enddo
+    endif
+  enddo
+  call write_int(6,n_det_cas_complex, 'Number of determinants in the CAS')
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_cas_sorted_bit_complex, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ complex*16, psi_cas_coef_sorted_bit_complex, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! |CAS| determinants sorted to accelerate the search of a random determinant in the wave
+ ! function.
+ END_DOC
+ call sort_dets_by_det_search_key_complex(n_det_cas_complex, psi_cas_complex, psi_cas_coef_complex, size(psi_cas_coef_complex,1), &
+     psi_cas_sorted_bit_complex, psi_cas_coef_sorted_bit_complex, N_states)
+
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_non_cas_complex,  (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ complex*16, psi_non_cas_coef_complex, (psi_det_size,n_states) ]
+&BEGIN_PROVIDER [ integer, idx_non_cas_complex,  (psi_det_size) ]
+&BEGIN_PROVIDER [ integer, N_det_non_cas_complex ]
+ implicit none
+ BEGIN_DOC
+  ! Set of determinants which are not part of the |CAS|, defined from the application
+  ! of the |CAS| bitmask on the determinants.
+  ! idx_non_cas gives the indice of the determinant in psi_det.
+ END_DOC
+ integer                        :: i_non_cas,j,k
+ integer                        :: degree
+ logical                        :: in_cas
+ i_non_cas =0
+ do k=1,N_det
+   in_cas = .False.
+   do j=1,N_det_cas_complex
+     call get_excitation_degree(psi_cas_complex(1,1,j), psi_det(1,1,k), degree, N_int)
+     if (degree == 0) then
+       in_cas = .True.
+       exit
+     endif
+   enddo
+   if (.not.in_cas) then
+     double precision :: hij
+     i_non_cas += 1
+     do j=1,N_int
+       psi_non_cas_complex(j,1,i_non_cas) = psi_det(j,1,k)
+       psi_non_cas_complex(j,2,i_non_cas) = psi_det(j,2,k)
+     enddo
+     do j=1,N_states
+       psi_non_cas_coef_complex(i_non_cas,j) = psi_coef_complex(k,j)
+     enddo
+     idx_non_cas_complex(i_non_cas) = k
+   endif
+ enddo
+ N_det_non_cas_complex = i_non_cas
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_non_cas_sorted_bit_complex, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ complex*16, psi_non_cas_coef_sorted_bit_complex, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! |CAS| determinants sorted to accelerate the search of a random determinant in the wave
+ ! function.
+ END_DOC
+ !TODO: should this be n_det_non_cas_complex?
+ call sort_dets_by_det_search_key_complex(N_det_cas_complex, psi_non_cas_complex, psi_non_cas_coef_complex, size(psi_non_cas_coef_complex,1), &
+     psi_non_cas_sorted_bit_complex, psi_non_cas_coef_sorted_bit_complex, N_states)
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [complex*16, H_matrix_cas_complex, (N_det_cas_complex,N_det_cas_complex)]
+ implicit none
+ integer :: i,j
+ complex*16 :: hij
+  do i = 1, N_det_cas_complex
+   do j = 1, N_det_cas_complex
+    call i_h_j_complex(psi_cas_complex(1,1,i),psi_cas_complex(1,1,j),N_int,hij)
+    H_matrix_cas_complex(i,j) = hij
+   enddo
+  enddo
+END_PROVIDER
+
+ BEGIN_PROVIDER [complex*16, psi_coef_cas_diagonalized_complex, (N_det_cas_complex,N_states)]
+&BEGIN_PROVIDER [double precision, psi_cas_energy_diagonalized_complex, (N_states)]
+ implicit none
+ integer :: i,j
+  double precision, allocatable  :: eigenvalues(:)
+  complex*16, allocatable  :: eigenvectors(:,:)
+  allocate (eigenvectors(size(H_matrix_cas,1),N_det_cas))
+  allocate (eigenvalues(N_det_cas))
+  call lapack_diag_complex(eigenvalues,eigenvectors,                       &
+      H_matrix_cas_complex,size(H_matrix_cas_complex,1),N_det_cas_complex)
+  do i = 1, N_states
+   psi_cas_energy_diagonalized_complex(i) = eigenvalues(i)
+   do j = 1, N_det_cas_complex
+    psi_coef_cas_diagonalized_complex(j,i) = eigenvectors(j,i)
+   enddo
+  enddo
+
+
+ END_PROVIDER
+
diff --git a/src/determinants/psi_energy_mono_elec.irp.f b/src/determinants/psi_energy_mono_elec.irp.f
index 74e69160..1f6d69d5 100644
--- a/src/determinants/psi_energy_mono_elec.irp.f
+++ b/src/determinants/psi_energy_mono_elec.irp.f
@@ -9,7 +9,26 @@
 ! computed using the :c:data:`one_e_dm_mo_alpha` +
 ! :c:data:`one_e_dm_mo_beta` and :c:data:`mo_one_e_integrals`
   END_DOC
+  double precision :: accu
   psi_energy_h_core = 0.d0
+  if (is_complex) then
+    do i = 1, N_states
+      do j = 1, mo_num
+        do k = 1, mo_num
+          psi_energy_h_core(i) += dble(mo_one_e_integrals_complex(k,j) * &
+                      (one_e_dm_mo_alpha_complex(j,k,i) + one_e_dm_mo_beta_complex(j,k,i)))
+        enddo
+      enddo
+    enddo
+    do i = 1, N_states
+      accu = 0.d0
+      do j = 1, mo_num
+        accu += dble(one_e_dm_mo_alpha_complex(j,j,i) + one_e_dm_mo_beta_complex(j,j,i))
+      enddo
+      accu = (elec_alpha_num + elec_beta_num ) / accu
+      psi_energy_h_core(i) = psi_energy_h_core(i) * accu
+    enddo
+  else
   do i = 1, N_states
    do j = 1, mo_num
     do k = 1, mo_num
@@ -17,7 +36,6 @@
     enddo
    enddo
   enddo
- double precision :: accu
  do i = 1, N_states
   accu = 0.d0
   do j = 1, mo_num
@@ -26,4 +44,5 @@
   accu = (elec_alpha_num + elec_beta_num ) / accu
   psi_energy_h_core(i) = psi_energy_h_core(i) * accu
  enddo
+ endif
 END_PROVIDER
diff --git a/src/determinants/ref_bitmask.irp.f b/src/determinants/ref_bitmask.irp.f
index 4e029ceb..d53be414 100644
--- a/src/determinants/ref_bitmask.irp.f
+++ b/src/determinants/ref_bitmask.irp.f
@@ -6,6 +6,7 @@
 &BEGIN_PROVIDER [ double precision, ref_bitmask_energy_ab ]
 &BEGIN_PROVIDER [ double precision, ref_bitmask_energy_bb ]
 &BEGIN_PROVIDER [ double precision, ref_bitmask_energy_aa ]
+&BEGIN_PROVIDER [ double precision, ref_bitmask_energy_with_nucl_rep ]
 
   use bitmasks
   implicit none
@@ -27,15 +28,15 @@
   ref_bitmask_two_e_energy = 0.d0
 
   do i = 1, elec_beta_num
-    ref_bitmask_energy += mo_one_e_integrals(occ(i,1),occ(i,1)) + mo_one_e_integrals(occ(i,2),occ(i,2))
-    ref_bitmask_kinetic_energy += mo_kinetic_integrals(occ(i,1),occ(i,1)) + mo_kinetic_integrals(occ(i,2),occ(i,2))
-    ref_bitmask_n_e_energy += mo_integrals_n_e(occ(i,1),occ(i,1)) + mo_integrals_n_e(occ(i,2),occ(i,2))
+    ref_bitmask_energy += mo_one_e_integrals_diag(occ(i,1)) + mo_one_e_integrals_diag(occ(i,2))
+    ref_bitmask_kinetic_energy += mo_kinetic_integrals_diag(occ(i,1)) + mo_kinetic_integrals_diag(occ(i,2))
+    ref_bitmask_n_e_energy += mo_integrals_n_e_diag(occ(i,1)) + mo_integrals_n_e_diag(occ(i,2))
   enddo
 
   do i = elec_beta_num+1,elec_alpha_num
-    ref_bitmask_energy += mo_one_e_integrals(occ(i,1),occ(i,1))
-    ref_bitmask_kinetic_energy += mo_kinetic_integrals(occ(i,1),occ(i,1))
-    ref_bitmask_n_e_energy += mo_integrals_n_e(occ(i,1),occ(i,1))
+    ref_bitmask_energy += mo_one_e_integrals_diag(occ(i,1))
+    ref_bitmask_kinetic_energy += mo_kinetic_integrals_diag(occ(i,1))
+    ref_bitmask_n_e_energy += mo_integrals_n_e_diag(occ(i,1))
   enddo
 
   do j= 1, elec_alpha_num
@@ -80,7 +81,7 @@
  enddo
  ref_bitmask_energy_bb = ref_bitmask_energy_bb * 0.5d0
 
-
+ ref_bitmask_energy_with_nucl_rep = ref_bitmask_energy + nuclear_repulsion 
 
 END_PROVIDER
 
diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f
index d73b2dbf..031334a5 100644
--- a/src/determinants/s2.irp.f
+++ b/src/determinants/s2.irp.f
@@ -7,6 +7,7 @@ double precision function diag_S_mat_elem(key_i,Nint)
   integer(bit_kind), intent(in)  :: key_i(Nint,2)
   BEGIN_DOC
 ! Returns <i|S^2|i>
+! returns <i|S_+ S_-|i> = <i|S^2|i> - S_z*(S_z-1)
   END_DOC
   integer                        :: nup, ntot, i
   integer(bit_kind)              :: xorvec(N_int_max), upvec(N_int_max)
@@ -44,7 +45,7 @@ subroutine get_s2(key_i,key_j,Nint,s2)
   implicit none
   use bitmasks
   BEGIN_DOC
-  ! Returns $\langle S^2 \rangle - S_z^2 S_z$
+  ! Returns $\langle S^2 \rangle - (S_z^2-S_z)$
   END_DOC
   integer, intent(in)            :: Nint
   integer(bit_kind), intent(in)  :: key_i(Nint,2)
@@ -109,7 +110,11 @@ BEGIN_PROVIDER [ double precision, s2_values, (N_states) ]
 ! array of the averaged values of the S^2 operator on the various states
  END_DOC
  integer :: i
+ if (is_complex) then
+    call u_0_S2_u_0_complex(s2_values,psi_coef_complex,n_det,psi_det,N_int,N_states,psi_det_size)
+ else
  call u_0_S2_u_0(s2_values,psi_coef,n_det,psi_det,N_int,N_states,psi_det_size)
+ endif
 
 END_PROVIDER
 
diff --git a/src/determinants/s2_cplx.irp.f b/src/determinants/s2_cplx.irp.f
new file mode 100644
index 00000000..4db82f14
--- /dev/null
+++ b/src/determinants/s2_cplx.irp.f
@@ -0,0 +1,288 @@
+subroutine u_0_S2_u_0_complex(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes e_0 = <u_0|S2|u_0>/<u_0|u_0>
+  !
+  ! n : number of determinants
+  !
+  END_DOC
+  integer, intent(in)            :: n,Nint, N_st, sze_8
+  double precision, intent(out)  :: e_0(N_st)
+  complex*16, intent(in)   :: u_0(sze_8,N_st)
+  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
+
+  complex*16, allocatable  :: v_0(:,:)
+  double precision               :: u_dot_u_complex
+  complex*16                     :: u_dot_v_complex
+  integer :: i,j
+  allocate (v_0(sze_8,N_st))
+
+  call s2_u_0_nstates_complex(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
+  do i=1,N_st
+    e_0(i) = dble(u_dot_v_complex(u_0(1,i),v_0(1,i),n))/u_dot_u_complex(u_0(1,i),n) + S_z2_Sz
+  enddo
+end
+
+
+
+subroutine S2_u_0_complex(v_0,u_0,n,keys_tmp,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_0 = S^2|u_0>
+  !
+  ! n : number of determinants
+  !
+  END_DOC
+  integer, intent(in)            :: n,Nint
+  complex*16, intent(out)  :: v_0(n)
+  complex*16, intent(in)   :: u_0(n)
+  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
+  call s2_u_0_nstates_complex(v_0,u_0,n,keys_tmp,Nint,1,n)
+end
+
+subroutine S2_u_0_nstates_complex(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes v_0  = S^2|u_0>
+  !
+  ! n : number of determinants
+  !
+  END_DOC
+  integer, intent(in)            :: N_st,n,Nint, sze_8
+  complex*16, intent(out)  :: v_0(sze_8,N_st)
+  complex*16, intent(in)   :: u_0(sze_8,N_st)
+  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
+  double precision               :: s2_tmp
+  complex*16, allocatable  :: vt(:,:)
+  integer                        :: i,j,k,l, jj,ii
+  integer                        :: i0, j0
+
+  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
+  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
+  integer(bit_kind)              :: sorted_i(Nint)
+
+  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
+
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (n>0)
+  PROVIDE ref_bitmask_energy
+
+  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
+  v_0 = (0.d0,0.d0)
+
+  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
+  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
+
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP PRIVATE(i,s2_tmp,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
+      !$OMP SHARED(n,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze_8)
+  allocate(vt(sze_8,N_st))
+  vt = (0.d0,0.d0)
+
+  do sh=1,shortcut(0,1)
+    !$OMP DO SCHEDULE(static,1)
+    do sh2=sh,shortcut(0,1)
+      exa = 0
+      do ni=1,Nint
+        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
+      end do
+      if(exa > 2) then
+        cycle
+      end if
+
+      do i=shortcut(sh,1),shortcut(sh+1,1)-1
+        org_i = sort_idx(i,1)
+        if(sh==sh2) then
+          endi = i-1
+        else
+          endi = shortcut(sh2+1,1)-1
+        end if
+        do ni=1,Nint
+          sorted_i(ni) = sorted(ni,i,1)
+        enddo
+
+        do j=shortcut(sh2,1),endi
+          org_j = sort_idx(j,1)
+          ext = exa
+          do ni=1,Nint
+            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+          end do
+          if(ext <= 4) then
+            call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
+            do istate=1,N_st
+              vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
+              vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
+            enddo
+          endif
+        enddo
+      enddo
+    enddo
+    !$OMP END DO NOWAIT
+  enddo
+
+  do sh=1,shortcut(0,2)
+    !$OMP DO
+    do i=shortcut(sh,2),shortcut(sh+1,2)-1
+      org_i = sort_idx(i,2)
+      do j=shortcut(sh,2),i-1
+        org_j = sort_idx(j,2)
+        ext = 0
+        do ni=1,Nint
+          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
+        end do
+        if(ext == 4) then
+          call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
+          do istate=1,N_st
+            vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
+            vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
+          enddo
+        end if
+      end do
+    end do
+    !$OMP END DO NOWAIT
+  enddo
+  !$OMP BARRIER
+
+  do istate=1,N_st
+    do i=n,1,-1
+      !$OMP ATOMIC
+      v_0(i,istate) = v_0(i,istate) + vt(i,istate)
+    enddo
+  enddo
+
+  deallocate(vt)
+  !$OMP END PARALLEL
+
+  do i=1,n
+    call get_s2(keys_tmp(1,1,i),keys_tmp(1,1,i),Nint,s2_tmp)
+    do istate=1,N_st
+      v_0(i,istate) += s2_tmp * u_0(i,istate)
+    enddo
+  enddo
+
+  deallocate (shortcut, sort_idx, sorted, version)
+end
+
+
+
+
+
+
+
+subroutine get_uJ_s2_uI_complex(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nstates)
+  !todo: modify/implement for complex
+  print*,irp_here,' not implemented for complex'
+  stop -1
+!  implicit none
+!  use bitmasks
+!  integer, intent(in)            :: n,nmax_coefs,nmax_keys,nstates
+!  integer(bit_kind), intent(in)  :: psi_keys_tmp(N_int,2,nmax_keys)
+!  complex*16, intent(in)   :: psi_coefs_tmp(nmax_coefs,nstates)
+!  complex*16, intent(out)  :: s2(nstates,nstates)
+!  double precision               :: s2_tmp
+!  complex*16                     :: accu
+!  integer                        :: i,j,l,jj,ll,kk
+!  integer, allocatable           :: idx(:)
+!  BEGIN_DOC
+!  ! returns the matrix elements of S^2 "s2(i,j)" between the "nstates" states
+!  ! psi_coefs_tmp(:,i) and psi_coefs_tmp(:,j)
+!  END_DOC
+!  s2 = (0.d0,0.d0)
+!  do ll = 1, nstates
+!    do jj = 1, nstates
+!      accu = (0.d0,0.d0)
+!      !$OMP PARALLEL DEFAULT(NONE)                                   &
+!          !$OMP PRIVATE (i,j,kk,idx,s2_tmp)                          &
+!          !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
+!          !$OMP REDUCTION(+:accu)
+!      allocate(idx(0:n))
+!      !$OMP DO SCHEDULE(dynamic)
+!      do i = n,1,-1   ! Better OMP scheduling
+!        call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
+!        accu += dconjg(psi_coefs_tmp(i,ll)) * s2_tmp * psi_coefs_tmp(i,jj)
+!        call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
+!        do kk=1,idx(0)
+!          j = idx(kk)
+!          call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),N_int,s2_tmp)
+!          accu += dconjg(psi_coefs_tmp(i,ll)) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
+!        enddo
+!      enddo
+!      !$OMP END DO
+!      deallocate(idx)
+!      !$OMP END PARALLEL
+!      s2(ll,jj) += accu
+!    enddo
+!  enddo
+!  do i = 1, nstates
+!    do j =i+1,nstates
+!      accu = 0.5d0 * (s2(i,j) + s2(j,i))
+!      s2(i,j) = accu
+!      s2(j,i) = accu
+!    enddo
+!  enddo
+end
+
+
+subroutine i_S2_psi_minilist_complex(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,Nstate,i_S2_psi_array)
+  !todo: modify/implement for complex
+  print*,irp_here,' not implemented for complex'
+  stop -1
+!  use bitmasks
+!  implicit none
+!  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate,idx_key(Ndet), N_minilist
+!  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
+!  integer(bit_kind), intent(in)  :: key(Nint,2)
+!  double precision, intent(in)   :: coef(Ndet_max,Nstate)
+!  double precision, intent(out)  :: i_S2_psi_array(Nstate)
+!
+!  integer                        :: i, ii,j, i_in_key, i_in_coef
+!  double precision               :: phase
+!  integer                        :: exc(0:2,2,2)
+!  double precision               :: s2ij
+!  integer                        :: idx(0:Ndet)
+!  BEGIN_DOC
+!! Computes $\langle i|S^2|\Psi \rangle = \sum_J c_J \langle i|S^2|J \rangle$.
+!!
+!! Uses filter_connected_i_H_psi0 to get all the $|J\rangle$ to which $|i\rangle$
+!! is connected. The $|J\rangle$ are searched in short pre-computed lists.
+!  END_DOC
+!
+!  ASSERT (Nint > 0)
+!  ASSERT (N_int == Nint)
+!  ASSERT (Nstate > 0)
+!  ASSERT (Ndet > 0)
+!  ASSERT (Ndet_max >= Ndet)
+!  i_S2_psi_array = 0.d0
+!
+!  call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx)
+!  if (Nstate == 1) then
+!
+!    do ii=1,idx(0)
+!      i_in_key = idx(ii)
+!      i_in_coef = idx_key(idx(ii))
+!      !DIR$ FORCEINLINE
+!      call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
+!      ! TODO : Cache misses
+!      i_S2_psi_array(1) = i_S2_psi_array(1) + coef(i_in_coef,1)*s2ij
+!    enddo
+!
+!  else
+!
+!    do ii=1,idx(0)
+!      i_in_key = idx(ii)
+!      i_in_coef = idx_key(idx(ii))
+!      !DIR$ FORCEINLINE
+!      call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
+!      do j = 1, Nstate
+!        i_S2_psi_array(j) = i_S2_psi_array(j) + coef(i_in_coef,j)*s2ij
+!      enddo
+!    enddo
+!
+!  endif
+!
+end
diff --git a/src/determinants/single_excitation_two_e.irp.f b/src/determinants/single_excitation_two_e.irp.f
index f150f531..9fb03619 100644
--- a/src/determinants/single_excitation_two_e.irp.f
+++ b/src/determinants/single_excitation_two_e.irp.f
@@ -133,4 +133,138 @@ BEGIN_PROVIDER [double precision, fock_wee_closed_shell, (mo_num, mo_num) ]
 
 END_PROVIDER
 
+subroutine single_excitation_wee_complex(det_1,det_2,h,p,spin,phase,hij)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: det_1(N_int,2), det_2(N_int,2)
+ complex*16, intent(out) :: hij
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ do i = 1, N_int
+  differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),det_1(i,1))
+  partcl(i,2) = iand(differences(i,2),det_1(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hij = fock_wee_closed_shell_complex(h,p)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  hij -= big_array_coulomb_integrals_complex(i,h,p) ! get_mo_two_e_integral_schwartz(h,i,p,i,mo_integrals_map)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  hij -= big_array_coulomb_integrals_complex(i,h,p) !get_mo_two_e_integral_schwartz(h,i,p,i,mo_integrals_map)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  hij += big_array_exchange_integrals_complex(i,h,p) ! get_mo_two_e_integral_schwartz(h,i,i,p,mo_integrals_map)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  hij += big_array_coulomb_integrals_complex(i,h,p)!get_mo_two_e_integral_schwartz(h,i,p,i,mo_integrals_map)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  hij += big_array_coulomb_integrals_complex(i,h,p) !get_mo_two_e_integral_schwartz(h,i,p,i,mo_integrals_map)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  hij -= big_array_exchange_integrals_complex(i,h,p)!get_mo_two_e_integral_schwartz(h,i,i,p,mo_integrals_map)
+ enddo
+ hij = hij * phase
+
+end
+
+
+BEGIN_PROVIDER [complex*16, fock_wee_closed_shell_complex, (mo_num, mo_num) ]
+ implicit none
+ integer :: i0,j0,i,j,k0,k
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ complex*16 :: accu
+
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ complex*16 :: array_coulomb(mo_num),array_exchange(mo_num)
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do i0 = 1,  n_occ_ab(1)
+  i=occ(i0,1)
+  do j0 = 1, n_occ_ab_virt(1)
+   j = occ_virt(j0,1)
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_wee_closed_shell_complex(i,j) = accu
+   fock_wee_closed_shell_complex(j,i) = dconjg(accu)
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do i0 = 1,  n_occ_ab_virt(1)
+  i=occ_virt(i0,1)
+  do j0 = 1, n_occ_ab_virt(1)
+   j = occ_virt(j0,1)
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_wee_closed_shell_complex(i,j) = accu
+   fock_wee_closed_shell_complex(j,i) = dconjg(accu)
+  enddo
+ enddo
+
+ ! docc ---> docc single excitations
+ do i0 = 1,  n_occ_ab(1)
+  i=occ(i0,1)
+  do j0 = 1, n_occ_ab(1)
+   j = occ(j0,1)
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_wee_closed_shell_complex(i,j) = accu
+   fock_wee_closed_shell_complex(j,i) = dconjg(accu)
+  enddo
+ enddo
+
+END_PROVIDER
+
 
diff --git a/src/determinants/single_excitations.irp.f b/src/determinants/single_excitations.irp.f
index ccfeaa2e..7199ef35 100644
--- a/src/determinants/single_excitations.irp.f
+++ b/src/determinants/single_excitations.irp.f
@@ -1,7 +1,7 @@
   use bitmasks
 BEGIN_PROVIDER [integer(bit_kind), ref_closed_shell_bitmask, (N_int,2)]
  implicit none
- integer :: i,i0
+ integer :: i,i0,k
  integer :: n_occ_ab(2)
  integer :: occ(N_int*bit_kind_size,2)
  call bitstring_to_list_ab(ref_bitmask, occ, n_occ_ab, N_int)
@@ -10,16 +10,24 @@ BEGIN_PROVIDER [integer(bit_kind), ref_closed_shell_bitmask, (N_int,2)]
   ref_closed_shell_bitmask(i,1) = ref_bitmask(i,1)
   ref_closed_shell_bitmask(i,2) = ref_bitmask(i,2)
  enddo
- do i0 = elec_beta_num+1, elec_alpha_num
-   i=occ(i0,1)
-   call clear_bit_to_integer(i,ref_closed_shell_bitmask(1,1),N_int)
- enddo
-
-
+ if (is_complex) then
+   !todo: check this
+   do k=1,kpt_num
+     call bitstring_to_list_ab(ref_bitmask_kpts(1,1,k),occ,n_occ_ab,N_int)
+     do i0=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k)
+       i=occ(i0,1)
+       call clear_bit_to_integer(i,ref_closed_shell_bitmask(1,1),N_int)
+     enddo
+   enddo
+ else
+   do i0 = elec_beta_num+1, elec_alpha_num
+     i=occ(i0,1)
+     call clear_bit_to_integer(i,ref_closed_shell_bitmask(1,1),N_int)
+   enddo
+ endif
 END_PROVIDER
 
-
-BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_num, mo_num) ]
+BEGIN_PROVIDER [double precision, fock_op_cshell_ref_bitmask, (mo_num, mo_num) ]
  implicit none
  integer :: i0,j0,i,j,k0,k
  integer :: n_occ_ab(2)
@@ -52,8 +60,8 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_nu
     k = occ(k0,1)
     accu += 2.d0 * array_coulomb(k) - array_exchange(k)
    enddo
-   fock_operator_closed_shell_ref_bitmask(i,j) = accu + mo_one_e_integrals(i,j)
-   fock_operator_closed_shell_ref_bitmask(j,i) = accu + mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(i,j) = accu + mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(j,i) = accu + mo_one_e_integrals(i,j)
   enddo
  enddo
 
@@ -69,8 +77,8 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_nu
     k = occ(k0,1)
     accu += 2.d0 * array_coulomb(k) - array_exchange(k)
    enddo
-   fock_operator_closed_shell_ref_bitmask(i,j) = accu+ mo_one_e_integrals(i,j)
-   fock_operator_closed_shell_ref_bitmask(j,i) = accu+ mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(i,j) = accu+ mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(j,i) = accu+ mo_one_e_integrals(i,j)
   enddo
  enddo
 
@@ -86,8 +94,8 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_nu
     k = occ(k0,1)
     accu += 2.d0 * array_coulomb(k) - array_exchange(k)
    enddo
-   fock_operator_closed_shell_ref_bitmask(i,j) = accu+ mo_one_e_integrals(i,j)
-   fock_operator_closed_shell_ref_bitmask(j,i) = accu+ mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(i,j) = accu+ mo_one_e_integrals(i,j)
+   fock_op_cshell_ref_bitmask(j,i) = accu+ mo_one_e_integrals(i,j)
   enddo
  enddo
  deallocate(array_coulomb,array_exchange)
@@ -123,7 +131,7 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
  enddo
  call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
  call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
- hij = fock_operator_closed_shell_ref_bitmask(h,p)
+ hij = fock_op_cshell_ref_bitmask(h,p)
  ! holes :: direct terms
  do i0 = 1, n_occ_ab_hole(1)
   i = occ_hole(i0,1)
@@ -159,3 +167,350 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
 
 end
 
+!============================================!
+!                                            !
+!                 complex                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, fock_op_cshell_ref_bitmask_cplx, (mo_num, mo_num) ]
+ implicit none
+ integer :: i0,j0,i,j,k0,k
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ complex*16 :: accu
+
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ complex*16, allocatable :: array_coulomb(:),array_exchange(:)
+ allocate (array_coulomb(mo_num),array_exchange(mo_num))
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do i0 = 1,  n_occ_ab(1)
+  i=occ(i0,1)
+  do j0 = 1, n_occ_ab_virt(1)
+   j = occ_virt(j0,1)
+   ! <ia|ja>
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   ! <ia|aj>
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_op_cshell_ref_bitmask_cplx(i,j) = accu + mo_one_e_integrals_complex(i,j)
+   !fock_op_cshell_ref_bitmask_cplx(j,i) = dconjg(accu) + mo_one_e_integrals_complex(j,i)
+   fock_op_cshell_ref_bitmask_cplx(j,i) = dconjg(fock_op_cshell_ref_bitmask_cplx(i,j))
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do i0 = 1,  n_occ_ab_virt(1)
+  i=occ_virt(i0,1)
+  do j0 = 1, n_occ_ab_virt(1)
+   j = occ_virt(j0,1)
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_op_cshell_ref_bitmask_cplx(i,j) = accu+ mo_one_e_integrals_complex(i,j)
+   fock_op_cshell_ref_bitmask_cplx(j,i) = dconjg(accu)+ mo_one_e_integrals_complex(j,i)
+  enddo
+ enddo
+
+ ! docc ---> docc single excitations
+ do i0 = 1,  n_occ_ab(1)
+  i=occ(i0,1)
+  do j0 = 1, n_occ_ab(1)
+   j = occ(j0,1)
+   call get_mo_two_e_integrals_coulomb_ii_complex(i,j,mo_num,array_coulomb,mo_integrals_map,mo_integrals_map_2)
+   call get_mo_two_e_integrals_exch_ii_complex(i,j,mo_num,array_exchange,mo_integrals_map,mo_integrals_map_2)
+   accu = (0.d0,0.d0)
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * array_coulomb(k) - array_exchange(k)
+   enddo
+   fock_op_cshell_ref_bitmask_cplx(i,j) = accu+ mo_one_e_integrals_complex(i,j)
+   fock_op_cshell_ref_bitmask_cplx(j,i) = dconjg(accu)+ mo_one_e_integrals_complex(j,i)
+  enddo
+ enddo
+ deallocate(array_coulomb,array_exchange)
+
+END_PROVIDER
+
+subroutine get_single_excitation_from_fock_complex(det_1,det_2,h,p,spin,phase,hij)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: det_1(N_int,2), det_2(N_int,2)
+ complex*16, intent(out) :: hij
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ complex*16 :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = big_array_coulomb_integrals_complex(i,h,p)
+   buffer_x(i) = big_array_exchange_integrals_complex(i,h,p)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),det_1(i,1))
+  partcl(i,2) = iand(differences(i,2),det_1(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hij = fock_op_cshell_ref_bitmask_cplx(h,p)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  hij -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  hij -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  hij += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  hij += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  hij += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  hij -= buffer_x(i)
+ enddo
+ hij = hij * phase
+
+end
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [integer(bit_kind), ref_closed_shell_bitmask_kpts, (N_int,2,kpt_num)]
+ implicit none
+ integer :: i,k
+ do k = 1, kpt_num
+   do i = 1, N_int
+     ref_closed_shell_bitmask_kpts(i,1,k) = iand(ref_closed_shell_bitmask(i,1),kpts_bitmask(i,k))
+     ref_closed_shell_bitmask_kpts(i,2,k) = iand(ref_closed_shell_bitmask(i,2),kpts_bitmask(i,k))
+   enddo
+ enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, fock_op_cshell_ref_bitmask_kpts, (mo_num_per_kpt, mo_num_per_kpt,kpt_num) ]
+ implicit none
+ integer :: i0,j0,i,j,k0,k,kblock,kvirt
+ integer :: i_i, i_j, i_k, kocc
+ integer :: n_occ_ab(2,kpt_num)
+ integer :: occ(N_int*bit_kind_size,2,kpt_num)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ complex*16 :: accu
+ complex*16, allocatable :: array_coulomb(:),array_exchange(:)
+
+ do kblock = 1,kpt_num
+   call bitstring_to_list_ab(ref_closed_shell_bitmask_kpts(1,1,kblock), &
+                            occ(1,1,kblock), n_occ_ab(1,kblock), N_int)
+ enddo
+ allocate (array_coulomb(mo_num_per_kpt),array_exchange(mo_num_per_kpt))
+ do kblock = 1,kpt_num
+   ! get virt orbs for this kpt
+   do i = 1, N_int
+     key_virt(i,1) = iand(full_ijkl_bitmask(i),kpts_bitmask(i,kblock))
+     key_virt(i,2) = iand(full_ijkl_bitmask(i),kpts_bitmask(i,kblock))
+     key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask_kpts(i,1,kblock))
+     key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask_kpts(i,2,kblock))
+   enddo
+   call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+   ! docc ---> virt single excitations
+   do i0 = 1,  n_occ_ab(1,kblock)
+    i=occ(i0,1,kblock)
+    i_i = mod(i-1,mo_num_per_kpt)+1
+    do j0 = 1, n_occ_ab_virt(1)
+     j = occ_virt(j0,1)
+     i_j = mod(j-1,mo_num_per_kpt)+1
+     accu = (0.d0,0.d0)
+     do kocc = 1,kpt_num
+      ! <ia|ja>
+      array_coulomb(1:mo_num_per_kpt) = big_array_coulomb_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      ! <ia|aj>
+      array_exchange(1:mo_num_per_kpt) = big_array_exchange_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      do k0 = 1, n_occ_ab(1,kocc)
+       k = occ(k0,1,kocc)
+       i_k = mod(k-1,mo_num_per_kpt)+1
+       accu += 2.d0 * array_coulomb(i_k) - array_exchange(i_k)
+      enddo
+     enddo
+     fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock) = accu + mo_one_e_integrals_kpts(i_i,i_j,kblock)
+     !fock_op_cshell_ref_bitmask_cplx(j,i) = dconjg(accu) + mo_one_e_integrals_complex(j,i)
+     fock_op_cshell_ref_bitmask_kpts(i_j,i_i,kblock) = dconjg(fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock))
+    enddo
+   enddo
+  
+   ! virt ---> virt single excitations
+   do i0 = 1,  n_occ_ab_virt(1)
+    i=occ_virt(i0,1)
+    i_i = mod(i-1,mo_num_per_kpt)+1
+    do j0 = 1, n_occ_ab_virt(1)
+     j = occ_virt(j0,1)
+     i_j = mod(j-1,mo_num_per_kpt)+1
+     accu = (0.d0,0.d0)
+     do kocc = 1,kpt_num
+      array_coulomb(1:mo_num_per_kpt) = big_array_coulomb_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      array_exchange(1:mo_num_per_kpt) = big_array_exchange_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      do k0 = 1, n_occ_ab(1,kocc)
+       k = occ(k0,1,kocc)
+       i_k = mod(k-1,mo_num_per_kpt)+1
+       accu += 2.d0 * array_coulomb(i_k) - array_exchange(i_k)
+      enddo
+     enddo
+     fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock) = accu + mo_one_e_integrals_kpts(i_i,i_j,kblock)
+     fock_op_cshell_ref_bitmask_kpts(i_j,i_i,kblock) = dconjg(fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock))
+    enddo
+   enddo
+  
+   ! docc ---> docc single excitations
+   do i0 = 1,  n_occ_ab(1,kblock)
+    i=occ(i0,1,kblock)
+    i_i = mod(i-1,mo_num_per_kpt)+1
+    do j0 = 1, n_occ_ab(1,kblock)
+     j = occ(j0,1,kblock)
+     i_j = mod(j-1,mo_num_per_kpt)+1
+     accu = (0.d0,0.d0)
+     do kocc = 1,kpt_num
+      array_coulomb(1:mo_num_per_kpt) = big_array_coulomb_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      array_exchange(1:mo_num_per_kpt) = big_array_exchange_integrals_kpts(1:mo_num_per_kpt,kocc,i_i,i_j,kblock)
+      do k0 = 1, n_occ_ab(1,kocc)
+       k = occ(k0,1,kocc)
+       i_k = mod(k-1,mo_num_per_kpt)+1
+       accu += 2.d0 * array_coulomb(i_k) - array_exchange(i_k)
+      enddo
+     enddo
+     fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock) = accu + mo_one_e_integrals_kpts(i_i,i_j,kblock)
+     fock_op_cshell_ref_bitmask_kpts(i_j,i_i,kblock) = dconjg(fock_op_cshell_ref_bitmask_kpts(i_i,i_j,kblock))
+    enddo
+   enddo
+  enddo
+ deallocate(array_coulomb,array_exchange)
+
+END_PROVIDER
+
+subroutine get_single_excitation_from_fock_kpts(det_1,det_2,ih,ip,spin,phase,hij)
+  use bitmasks
+  !called by i_h_j{,_s2,_single_spin}_complex
+  ! ih, ip are indices in total mo list (not per kpt)
+  implicit none
+  integer,intent(in) :: ih,ip,spin
+  double precision, intent(in)  :: phase
+  integer(bit_kind), intent(in) :: det_1(N_int,2), det_2(N_int,2)
+  complex*16, intent(out) :: hij
+  integer(bit_kind) :: differences(N_int,2)
+  integer(bit_kind) :: hole(N_int,2)
+  integer(bit_kind) :: partcl(N_int,2)
+  integer :: occ_hole(N_int*bit_kind_size,2)
+  integer :: occ_partcl(N_int*bit_kind_size,2)
+  integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+  integer :: i0,i,h,p
+  integer :: ki,khp,kh
+  complex*16 :: buffer_c(mo_num_per_kpt),buffer_x(mo_num_per_kpt)
+
+  call get_kpt_idx_mo(ip,khp,p)
+  call get_kpt_idx_mo(ih,kh,h)
+  ASSERT (kh==khp)
+  !todo: omp kpts
+  hij = fock_op_cshell_ref_bitmask_kpts(h,p,khp)
+  do ki=1,kpt_num
+    do i=1, mo_num_per_kpt
+      !<hi|pi>
+      buffer_c(i) = big_array_coulomb_integrals_kpts(i,ki,h,p,khp)
+      !<hi|ip>
+      buffer_x(i) = big_array_exchange_integrals_kpts(i,ki,h,p,khp)
+    enddo
+    do i = 1, N_int
+      !holes in ref, not in det1
+      !part in det1, not in ref
+      differences(i,1) = iand(xor(det_1(i,1),ref_closed_shell_bitmask(i,1)),kpts_bitmask(i,ki))
+      differences(i,2) = iand(xor(det_1(i,2),ref_closed_shell_bitmask(i,2)),kpts_bitmask(i,ki))
+      !differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask_kpts(i,1,ki))
+      !differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask_kpts(i,2,ki))
+      hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask_kpts(i,1,ki))
+      hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask_kpts(i,2,ki))
+      partcl(i,1) = iand(differences(i,1),det_1(i,1))
+      partcl(i,2) = iand(differences(i,2),det_1(i,2))
+    enddo
+    call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+    call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+    ! holes :: direct terms
+    do i0 = 1, n_occ_ab_hole(1)
+      i = occ_hole(i0,1) - (ki-1)*mo_num_per_kpt
+      hij -= buffer_c(i)
+    enddo
+    do i0 = 1, n_occ_ab_hole(2)
+      i = occ_hole(i0,2) - (ki-1)*mo_num_per_kpt
+      hij -= buffer_c(i)
+    enddo
+   
+    ! holes :: exchange terms
+    do i0 = 1, n_occ_ab_hole(spin)
+      i = occ_hole(i0,spin) - (ki-1)*mo_num_per_kpt
+      hij += buffer_x(i)
+    enddo
+   
+    ! particles :: direct terms
+    do i0 = 1, n_occ_ab_partcl(1)
+      i = occ_partcl(i0,1) - (ki-1)*mo_num_per_kpt
+      hij += buffer_c(i)
+    enddo
+    do i0 = 1, n_occ_ab_partcl(2)
+      i = occ_partcl(i0,2) - (ki-1)*mo_num_per_kpt
+      hij += buffer_c(i)
+    enddo
+   
+    ! particles :: exchange terms
+    do i0 = 1, n_occ_ab_partcl(spin)
+      i = occ_partcl(i0,spin) - (ki-1)*mo_num_per_kpt
+      hij -= buffer_x(i)
+    enddo
+  enddo
+  hij = hij * phase
+
+end
+
diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index ea8e0284..bcca451b 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -1581,8 +1581,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx)
 end
 
 
-
-
 double precision function diag_H_mat_elem_fock(det_ref,det_pert,fock_diag_tmp,Nint)
   use bitmasks
   implicit none
@@ -1745,7 +1743,7 @@ subroutine a_operator(iorb,ispin,key,hjj,Nint,na,nb)
   call bitstring_to_list_ab(key, occ, tmp, Nint)
   na = na-1
 
-  hjj = hjj - mo_one_e_integrals(iorb,iorb)
+  hjj = hjj - mo_one_e_integrals_diag(iorb)
 
   ! Same spin
   do i=1,na
@@ -1803,7 +1801,7 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb)
   key(k,ispin) = ibset(key(k,ispin),l)
   other_spin = iand(ispin,1)+1
 
-  hjj = hjj + mo_one_e_integrals(iorb,iorb)
+  hjj = hjj + mo_one_e_integrals_diag(iorb)
 
   ! Same spin
   do i=1,na
@@ -2292,3 +2290,606 @@ subroutine connected_to_hf(key_i,yes_no)
   yes_no = .True.
  endif
 end
+
+
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+
+subroutine i_H_j_s2_complex(key_i,key_j,Nint,hij,s2)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ and $\langle i|S^2|j \rangle$
+  ! where $i$ and $j$ are determinants.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+  double precision, intent(out)  :: s2
+
+  integer                        :: exc(0:2,2,2)
+  integer                        :: degree
+  complex*16               :: get_two_e_integral_complex
+  integer                        :: m,n,p,q
+  integer                        :: i,j,k
+  integer                        :: occ(Nint*bit_kind_size,2)
+  double precision               :: diag_h_mat_elem, phase
+  integer                        :: n_occ_ab(2)
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals_complex
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (sum(popcnt(key_i(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_i(:,2))) == elec_beta_num)
+  ASSERT (sum(popcnt(key_j(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
+
+  hij = (0.d0,0.d0)
+  s2 = 0.d0
+  !DIR$ FORCEINLINE
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  integer :: spin
+  select case (degree)
+    case (2)
+      call get_double_excitation(key_i,key_j,exc,phase,Nint)
+      ! Single alpha, single beta
+      if (exc(0,1,1) == 1) then
+        if ( (exc(1,1,1) == exc(1,2,2)).and.(exc(1,1,2) == exc(1,2,1)) ) then
+          s2 =  -phase
+        endif
+        if(exc(1,1,1) == exc(1,2,2) )then
+          hij = phase * big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
+        else if (exc(1,2,1) ==exc(1,1,2))then
+          hij = phase * big_array_exchange_integrals_complex(exc(1,2,1),exc(1,1,1),exc(1,2,2))
+        else
+          hij = phase*get_two_e_integral_complex(                      &
+              exc(1,1,1),                                              &
+              exc(1,1,2),                                              &
+              exc(1,2,1),                                              &
+              exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2)
+        endif
+      ! Double alpha
+      else if (exc(0,1,1) == 2) then
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(1,2,1),                                              &
+            exc(2,2,1) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(2,2,1),                                              &
+            exc(1,2,1) ,mo_integrals_map,mo_integrals_map_2) )
+      ! Double beta
+      else if (exc(0,1,2) == 2) then
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(1,2,2),                                              &
+            exc(2,2,2) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(2,2,2),                                              &
+            exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2) )
+      endif
+    case (1)
+      call get_single_excitation(key_i,key_j,exc,phase,Nint)
+      !DIR$ FORCEINLINE
+      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
+      ! Single alpha
+      if (exc(0,1,1) == 1) then
+        m = exc(1,1,1)
+        p = exc(1,2,1)
+        spin = 1
+      ! Single beta
+      else
+        m = exc(1,1,2)
+        p = exc(1,2,2)
+        spin = 2
+      endif
+      call get_single_excitation_from_fock_complex(key_i,key_j,m,p,spin,phase,hij)
+
+    case (0)
+      double precision, external :: diag_S_mat_elem
+      s2 = diag_S_mat_elem(key_i,Nint)
+      hij = dcmplx(diag_H_mat_elem(key_i,Nint),0.d0)
+  end select
+end
+
+
+
+subroutine i_H_j_complex(key_i,key_j,Nint,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ where $i$ and $j$ are determinants.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2,2)
+  integer                        :: degree
+  complex*16               :: get_two_e_integral_complex
+  integer                        :: m,n,p,q
+  integer                        :: i,j,k
+  integer :: ih1,ih2,ip1,ip2,kh1,kh2,kp1,kp2
+  integer                        :: occ(Nint*bit_kind_size,2)
+  double precision               :: diag_H_mat_elem, phase
+  integer                        :: n_occ_ab(2)
+  logical :: is_allowed
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals_complex
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (sum(popcnt(key_i(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_i(:,2))) == elec_beta_num)
+  ASSERT (sum(popcnt(key_j(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
+
+
+  hij = (0.d0,0.d0)
+  !DIR$ FORCEINLINE
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  integer :: spin
+  select case (degree)
+    case (2)
+      call get_double_excitation(key_i,key_j,exc,phase,Nint)
+      if (exc(0,1,1) == 1) then
+        call double_allowed_mo_kpts(exc(1,1,1),exc(1,1,2),exc(1,2,1),exc(1,2,2),is_allowed)
+        if (.not.is_allowed) then
+          ! excitation doesn't conserve momentum
+          hij = (0.d0,0.d0)
+          return
+        endif
+        ! Single alpha, single beta
+        if(exc(1,1,1) == exc(1,2,2) )then
+          !h1(a) = p2(b)
+          call get_kpt_idx_mo(exc(1,1,1),kh1,ih1)
+          call get_kpt_idx_mo(exc(1,1,2),kh2,ih2)
+          call get_kpt_idx_mo(exc(1,2,1),kp1,ip1)
+
+          if(kp1.ne.kh2) then
+            !if h1==p2 then kp1==kh2
+            print*,'problem with hij kpts: ',irp_here
+            print*,is_allowed
+            print*,exc(1,1,1),exc(1,1,2),exc(1,2,1),exc(1,2,2)
+            print*,ih1,kh1,ih2,kh2,ip1,kp1
+            stop -4
+          endif
+          hij = phase * big_array_exchange_integrals_kpts(ih1,kh1,ih2,ip1,kp1)
+          !hij = phase * big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
+        else if (exc(1,2,1) ==exc(1,1,2))then
+          !p1(a)==h2(b)
+          call get_kpt_idx_mo(exc(1,1,1),kh1,ih1)
+          call get_kpt_idx_mo(exc(1,2,1),kp1,ip1)
+          call get_kpt_idx_mo(exc(1,2,2),kp2,ip2)
+          if(kp2.ne.kh1) then
+            print*,'problem with hij kpts: ',irp_here
+            print*,is_allowed
+            print*,exc(1,1,1),exc(1,1,2),exc(1,2,1),exc(1,2,2)
+            print*,ip1,kp1,ip2,kp2,ih1,kh1
+            stop -5
+          endif
+          hij = phase * big_array_exchange_integrals_kpts(ip1,kp1,ih1,ip2,kp2)
+          !hij = phase * big_array_exchange_integrals_complex(exc(1,2,1),exc(1,1,1),exc(1,2,2))
+        else
+          hij = phase*get_two_e_integral_complex(                     &
+              exc(1,1,1),                                             &
+              exc(1,1,2),                                             &
+              exc(1,2,1),                                             &
+              exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2)
+        endif
+      else if (exc(0,1,1) == 2) then
+        call double_allowed_mo_kpts(exc(1,1,1),exc(2,1,1),exc(1,2,1),exc(2,2,1),is_allowed)
+        if (.not.is_allowed) then
+          hij = (0.d0,0.d0)
+          return
+        endif
+        ! Double alpha
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(1,2,1),                                              &
+            exc(2,2,1) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(2,2,1),                                              &
+            exc(1,2,1) ,mo_integrals_map,mo_integrals_map_2) )
+      else if (exc(0,1,2) == 2) then
+        call double_allowed_mo_kpts(exc(1,1,2),exc(2,1,2),exc(1,2,2),exc(2,2,2),is_allowed)
+        if (.not.is_allowed) then
+          hij = (0.d0,0.d0)
+          return
+        endif
+        ! Double beta
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(1,2,2),                                              &
+            exc(2,2,2) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(2,2,2),                                              &
+            exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2) )
+      endif
+    case (1)
+      call get_single_excitation(key_i,key_j,exc,phase,Nint)
+      !DIR$ FORCEINLINE
+      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
+      if (exc(0,1,1) == 1) then
+        ! Single alpha
+        m = exc(1,1,1)
+        p = exc(1,2,1)
+        spin = 1
+      else
+        ! Single beta
+        m = exc(1,1,2)
+        p = exc(1,2,2)
+        spin = 2
+      endif
+      !if m,p not from same kpt, single not allowed
+      if (int((m-1)/mo_num_per_kpt + 1).ne.int((p-1)/mo_num_per_kpt + 1)) then
+        hij = (0.d0,0.d0)
+        return
+      endif
+      !call get_single_excitation_from_fock_complex(key_i,key_j,m,p,spin,phase,hij)
+      call get_single_excitation_from_fock_kpts(key_i,key_j,m,p,spin,phase,hij)
+
+    case (0)
+      hij = dcmplx(diag_H_mat_elem(key_i,Nint),0.d0)
+  end select
+end
+
+
+
+
+
+subroutine i_H_j_verbose_complex(key_i,key_j,Nint,hij,hmono,hdouble,phase)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ where $i$ and $j$ are determinants.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij,hmono,hdouble
+  double precision, intent(out)  :: phase
+
+  integer                        :: exc(0:2,2,2)
+  integer                        :: degree
+  complex*16               :: get_two_e_integral_complex
+  integer                        :: m,n,p,q
+  integer                        :: i,j,k
+  integer                        :: occ(Nint*bit_kind_size,2)
+  double precision               :: diag_H_mat_elem
+  integer                        :: n_occ_ab(2)
+  logical                        :: has_mipi(Nint*bit_kind_size)
+  complex*16               :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size)
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_map
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (sum(popcnt(key_i(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_i(:,2))) == elec_beta_num)
+  ASSERT (sum(popcnt(key_j(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
+
+  hij = (0.d0,0.d0)
+  hmono = (0.d0,0.d0)
+  hdouble = (0.d0,0.d0)
+  !DIR$ FORCEINLINE
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  select case (degree)
+    case (2)
+      call get_double_excitation(key_i,key_j,exc,phase,Nint)
+      if (exc(0,1,1) == 1) then
+        ! Single alpha, single beta
+        hij = phase*get_two_e_integral_complex(                      &
+            exc(1,1,1),                                              &
+            exc(1,1,2),                                              &
+            exc(1,2,1),                                              &
+            exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2)
+      else if (exc(0,1,1) == 2) then
+        ! Double alpha
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(1,2,1),                                              &
+            exc(2,2,1) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(2,2,1),                                              &
+            exc(1,2,1) ,mo_integrals_map,mo_integrals_map_2) )
+
+      else if (exc(0,1,2) == 2) then
+        ! Double beta
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(1,2,2),                                              &
+            exc(2,2,2) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(2,2,2),                                              &
+            exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2) )
+      endif
+    case (1)
+      call get_single_excitation(key_i,key_j,exc,phase,Nint)
+      !DIR$ FORCEINLINE
+      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
+      has_mipi = .False.
+      if (exc(0,1,1) == 1) then
+        ! Single alpha
+        m = exc(1,1,1)
+        p = exc(1,2,1)
+        do k = 1, elec_alpha_num
+          i = occ(k,1)
+          if (.not.has_mipi(i)) then
+            mipi(i) = get_two_e_integral_complex(m,i,p,i,mo_integrals_map,mo_integrals_map_2)
+            miip(i) = get_two_e_integral_complex(m,i,i,p,mo_integrals_map,mo_integrals_map_2)
+            has_mipi(i) = .True.
+          endif
+        enddo
+        do k = 1, elec_beta_num
+          i = occ(k,2)
+          if (.not.has_mipi(i)) then
+            mipi(i) = get_two_e_integral_complex(m,i,p,i,mo_integrals_map,mo_integrals_map_2)
+            has_mipi(i) = .True.
+          endif
+        enddo
+
+        do k = 1, elec_alpha_num
+          hdouble = hdouble + mipi(occ(k,1)) - miip(occ(k,1))
+        enddo
+        do k = 1, elec_beta_num
+          hdouble = hdouble + mipi(occ(k,2))
+        enddo
+
+      else
+        ! Single beta
+        m = exc(1,1,2)
+        p = exc(1,2,2)
+        do k = 1, elec_beta_num
+          i = occ(k,2)
+          if (.not.has_mipi(i)) then
+            mipi(i) = get_two_e_integral_complex(m,i,p,i,mo_integrals_map,mo_integrals_map_2)
+            miip(i) = get_two_e_integral_complex(m,i,i,p,mo_integrals_map,mo_integrals_map_2)
+            has_mipi(i) = .True.
+          endif
+        enddo
+        do k = 1, elec_alpha_num
+          i = occ(k,1)
+          if (.not.has_mipi(i)) then
+            mipi(i) = get_two_e_integral_complex(m,i,p,i,mo_integrals_map,mo_integrals_map_2)
+            has_mipi(i) = .True.
+          endif
+        enddo
+
+        do k = 1, elec_alpha_num
+          hdouble = hdouble + mipi(occ(k,1))
+        enddo
+        do k = 1, elec_beta_num
+          hdouble = hdouble + mipi(occ(k,2)) - miip(occ(k,2))
+        enddo
+
+      endif
+      hmono = mo_one_e_integrals_complex(m,p)
+      hij = phase*(hdouble + hmono)
+
+    case (0)
+      phase = 1.d0
+      hij = dcmplx(diag_H_mat_elem(key_i,Nint),0.d0)
+  end select
+end
+
+
+subroutine i_H_psi_complex(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+! Computes $\langle i|H|Psi \rangle  = \sum_J c_J \langle i | H | J \rangle$.
+!
+! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$
+! is connected.
+! The i_H_psi_minilist is much faster but requires to build the
+! minilists.
+  END_DOC
+  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate
+  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
+  integer(bit_kind), intent(in)  :: key(Nint,2)
+  complex*16, intent(in)   :: coef(Ndet_max,Nstate)
+  complex*16, intent(out)  :: i_H_psi_array(Nstate)
+
+  integer                        :: i, ii,j
+  double precision               :: phase
+  integer                        :: exc(0:2,2,2)
+  complex*16               :: hij
+  integer, allocatable           :: idx(:)
+
+  ASSERT (Nint > 0)
+  ASSERT (N_int == Nint)
+  ASSERT (Nstate > 0)
+  ASSERT (Ndet > 0)
+  ASSERT (Ndet_max >= Ndet)
+  allocate(idx(0:Ndet))
+
+  i_H_psi_array = (0.d0,0.d0)
+
+  call filter_connected_i_h_psi0(keys,key,Nint,Ndet,idx)
+  if (Nstate == 1) then
+
+    do ii=1,idx(0)
+      i = idx(ii)
+      !DIR$ FORCEINLINE
+      call i_h_j_complex(key,keys(1,1,i),Nint,hij)
+      i_H_psi_array(1) = i_H_psi_array(1) + coef(i,1)*hij
+    enddo
+
+  else
+
+    do ii=1,idx(0)
+      i = idx(ii)
+      !DIR$ FORCEINLINE
+      call i_h_j_complex(key,keys(1,1,i),Nint,hij)
+      do j = 1, Nstate
+        i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij
+      enddo
+    enddo
+
+  endif
+
+end
+
+
+subroutine i_H_psi_minilist_complex(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
+  use bitmasks
+  implicit none
+  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate,idx_key(Ndet), N_minilist
+  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
+  integer(bit_kind), intent(in)  :: key(Nint,2)
+  complex*16, intent(in)   :: coef(Ndet_max,Nstate)
+  complex*16, intent(out)  :: i_H_psi_array(Nstate)
+
+  integer                        :: i, ii,j, i_in_key, i_in_coef
+  double precision               :: phase
+  integer                        :: exc(0:2,2,2)
+  complex*16               :: hij
+  integer, allocatable           :: idx(:)
+  BEGIN_DOC
+! Computes $\langle i|H|\Psi \rangle = \sum_J c_J \langle i|H|J\rangle$.
+!
+! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$
+! is connected. The $|J\rangle$ are searched in short pre-computed lists.
+  END_DOC
+
+  ASSERT (Nint > 0)
+  ASSERT (N_int == Nint)
+  ASSERT (Nstate > 0)
+  ASSERT (Ndet > 0)
+  ASSERT (Ndet_max >= Ndet)
+  allocate(idx(0:Ndet))
+  i_H_psi_array = 0.d0
+
+  call filter_connected_i_h_psi0(keys,key,Nint,N_minilist,idx)
+  if (Nstate == 1) then
+
+    do ii=1,idx(0)
+      i_in_key = idx(ii)
+      i_in_coef = idx_key(idx(ii))
+      !DIR$ FORCEINLINE
+      call i_h_j_complex(key,keys(1,1,i_in_key),Nint,hij)
+      ! TODO : Cache misses
+      i_H_psi_array(1) = i_H_psi_array(1) + coef(i_in_coef,1)*hij
+    enddo
+
+  else
+
+    do ii=1,idx(0)
+      i_in_key = idx(ii)
+      i_in_coef = idx_key(idx(ii))
+      !DIR$ FORCEINLINE
+      call i_h_j_complex(key,keys(1,1,i_in_key),Nint,hij)
+      do j = 1, Nstate
+        i_H_psi_array(j) = i_H_psi_array(j) + coef(i_in_coef,j)*hij
+      enddo
+    enddo
+
+  endif
+
+end
+
+
+
+subroutine i_H_j_single_spin_complex(key_i,key_j,Nint,spin,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ where $i$ and $j$ are determinants differing by
+  ! a single excitation.
+  END_DOC
+  integer, intent(in)            :: Nint, spin
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+
+  !PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+  PROVIDE big_array_exchange_integrals_kpts mo_two_e_integrals_in_map
+
+  call get_single_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
+  !call get_single_excitation_from_fock_complex(key_i,key_j,exc(1,1),exc(1,2),spin,phase,hij)
+  call get_single_excitation_from_fock_kpts(key_i,key_j,exc(1,1),exc(1,2),spin,phase,hij)
+end
+
+subroutine i_H_j_double_spin_complex(key_i,key_j,Nint,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ where $i$ and $j$ are determinants differing by
+  ! a same-spin double excitation.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint), key_j(Nint)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+  complex*16, external     :: get_two_e_integral_complex
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+  call get_double_excitation_spin(key_i,key_j,exc,phase,Nint)
+  hij = phase*(get_two_e_integral_complex(                             &
+      exc(1,1),                                                    &
+      exc(2,1),                                                    &
+      exc(1,2),                                                    &
+      exc(2,2), mo_integrals_map,mo_integrals_map_2) -                                &
+      get_two_e_integral_complex(                                      &
+      exc(1,1),                                                    &
+      exc(2,1),                                                    &
+      exc(2,2),                                                    &
+      exc(1,2), mo_integrals_map,mo_integrals_map_2) )
+end
+
+subroutine i_H_j_double_alpha_beta_complex(key_i,key_j,Nint,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$ where $i$ and $j$ are determinants differing by
+  ! an opposite-spin double excitation.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2,2)
+  double precision               :: phase, phase2
+  complex*16, external     :: get_two_e_integral_complex
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+
+  call get_single_excitation_spin(key_i(1,1),key_j(1,1),exc(0,1,1),phase,Nint)
+  call get_single_excitation_spin(key_i(1,2),key_j(1,2),exc(0,1,2),phase2,Nint)
+  phase = phase*phase2
+  if (exc(1,1,1) == exc(1,2,2)) then
+    hij = phase * big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
+  else if (exc(1,2,1) == exc(1,1,2)) then
+    hij = phase * big_array_exchange_integrals_complex(exc(1,2,1),exc(1,1,1),exc(1,2,2))
+  else
+    hij = phase*get_two_e_integral_complex(                              &
+        exc(1,1,1),                                                  &
+        exc(1,1,2),                                                  &
+        exc(1,2,1),                                                  &
+        exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2)
+  endif
+end
diff --git a/src/determinants/slater_rules_wee_mono.irp.f b/src/determinants/slater_rules_wee_mono.irp.f
index 4c1c9330..50ec4f79 100644
--- a/src/determinants/slater_rules_wee_mono.irp.f
+++ b/src/determinants/slater_rules_wee_mono.irp.f
@@ -225,7 +225,7 @@ double precision function diag_H_mat_elem_one_e(det_in,Nint)
   call bitstring_to_list_ab(det_in, occ_particle, tmp, Nint)
   do ispin = 1,2
    do i = 1, tmp(ispin)
-    diag_H_mat_elem_one_e +=  mo_one_e_integrals(occ_particle(i,ispin),occ_particle(i,ispin))
+    diag_H_mat_elem_one_e +=  mo_one_e_integrals_diag(occ_particle(i,ispin))
    enddo
   enddo
 
@@ -361,3 +361,180 @@ subroutine i_H_j_two_e(key_i,key_j,Nint,hij)
   end select
 end
 
+!==============================================================================!
+!                                                                              !
+!                                    Complex                                   !
+!                                                                              !
+!==============================================================================!
+
+subroutine i_Wee_j_single_complex(key_i,key_j,Nint,spin,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$  where $i$ and $j$ are determinants differing by a
+  ! single excitation.
+  END_DOC
+  integer, intent(in)            :: Nint, spin
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+
+  PROVIDE big_array_exchange_integrals_complex mo_two_e_integrals_in_map
+
+  call get_single_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
+  call single_excitation_wee_complex(key_i,key_j,exc(1,1),exc(1,2),spin,phase,hij)
+end
+
+
+subroutine i_H_j_mono_spin_one_e_complex(key_i,key_j,Nint,spin,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$  where $i$ and $j$ are determinants differing by
+  ! a single excitation.
+  END_DOC
+  integer, intent(in)            :: Nint, spin
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2)
+  double precision               :: phase
+
+  call get_single_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
+  integer :: m,p
+  m = exc(1,1)
+  p = exc(1,2)
+  hij = phase * mo_one_e_integrals_complex(m,p)
+end
+
+subroutine i_H_j_one_e_complex(key_i,key_j,Nint,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$  where $i$ and $j$ are determinants.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer :: degree,m,p
+  double precision :: diag_h_mat_elem_one_e,phase
+  integer                        :: exc(0:2,2,2)
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  hij = (0.d0,0.d0)
+  if(degree>1)then
+   return
+  endif
+  if(degree==0)then
+   hij = dcmplx(diag_h_mat_elem_one_e(key_i,N_int),0.d0)
+  else
+   call get_single_excitation(key_i,key_j,exc,phase,Nint)
+   if (exc(0,1,1) == 1) then
+     ! Mono alpha
+     m = exc(1,1,1)
+     p = exc(1,2,1)
+   else
+     ! Mono beta
+     m = exc(1,1,2)
+     p = exc(1,2,2)
+   endif
+   hij = phase * mo_one_e_integrals_complex(m,p)
+  endif
+
+end
+
+subroutine i_H_j_two_e_complex(key_i,key_j,Nint,hij)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Returns $\langle i|H|j \rangle$  where $i$ and $j$ are determinants.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
+  complex*16, intent(out)  :: hij
+
+  integer                        :: exc(0:2,2,2)
+  integer                        :: degree
+  complex*16               :: get_two_e_integral_complex
+  integer                        :: m,n,p,q
+  integer                        :: i,j,k
+  integer                        :: occ(Nint*bit_kind_size,2)
+  double precision               :: diag_H_mat_elem, phase,phase_2
+  integer                        :: n_occ_ab(2)
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals_complex ref_bitmask_two_e_energy
+
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  ASSERT (sum(popcnt(key_i(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_i(:,2))) == elec_beta_num)
+  ASSERT (sum(popcnt(key_j(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
+
+  hij = (0.d0,0.d0)
+  !DIR$ FORCEINLINE
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  integer :: spin
+  select case (degree)
+    case (2)
+      call get_double_excitation(key_i,key_j,exc,phase,Nint)
+      if (exc(0,1,1) == 1) then
+        ! Mono alpha, mono beta
+        if(exc(1,1,1) == exc(1,2,2) )then
+         hij = phase * big_array_exchange_integrals_complex(exc(1,1,1),exc(1,1,2),exc(1,2,1))
+        else if (exc(1,2,1) ==exc(1,1,2))then
+         hij = phase * big_array_exchange_integrals_complex(exc(1,2,1),exc(1,1,1),exc(1,2,2))
+        else
+         hij = phase*get_two_e_integral_complex(                   &
+             exc(1,1,1),                                              &
+             exc(1,1,2),                                              &
+             exc(1,2,1),                                              &
+             exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2)
+        endif
+      else if (exc(0,1,1) == 2) then
+        ! Double alpha
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(1,2,1),                                              &
+            exc(2,2,1) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,1),                                              &
+            exc(2,1,1),                                              &
+            exc(2,2,1),                                              &
+            exc(1,2,1) ,mo_integrals_map,mo_integrals_map_2) )
+      else if (exc(0,1,2) == 2) then
+        ! Double beta
+        hij = phase*(get_two_e_integral_complex(                     &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(1,2,2),                                              &
+            exc(2,2,2) ,mo_integrals_map,mo_integrals_map_2) -       &
+            get_two_e_integral_complex(                              &
+            exc(1,1,2),                                              &
+            exc(2,1,2),                                              &
+            exc(2,2,2),                                              &
+            exc(1,2,2) ,mo_integrals_map,mo_integrals_map_2) )
+      endif
+    case (1)
+      call get_single_excitation(key_i,key_j,exc,phase,Nint)
+      !DIR$ FORCEINLINE
+      call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint)
+      if (exc(0,1,1) == 1) then
+        ! Mono alpha
+        m = exc(1,1,1)
+        p = exc(1,2,1)
+        spin = 1
+      else
+        ! Mono beta
+        m = exc(1,1,2)
+        p = exc(1,2,2)
+        spin = 2
+      endif
+      call single_excitation_wee_complex(key_i,key_j,m,p,spin,phase,hij)
+    case (0)
+      double precision :: diag_wee_mat_elem
+      hij = dcmplx(diag_wee_mat_elem(key_i,Nint),0.d0)
+  end select
+end
diff --git a/src/determinants/spindeterminants.ezfio_config b/src/determinants/spindeterminants.ezfio_config
index 39ccb82b..bd4b80ce 100644
--- a/src/determinants/spindeterminants.ezfio_config
+++ b/src/determinants/spindeterminants.ezfio_config
@@ -10,6 +10,7 @@ spindeterminants
   psi_coef_matrix_rows    integer (spindeterminants_n_det)
   psi_coef_matrix_columns integer (spindeterminants_n_det)
   psi_coef_matrix_values  double precision (spindeterminants_n_det,spindeterminants_n_states)
+  psi_coef_matrix_values_complex  double precision (2,spindeterminants_n_det,spindeterminants_n_states)
   n_svd_coefs     integer
   psi_svd_alpha   double precision (spindeterminants_n_det_alpha,spindeterminants_n_svd_coefs,spindeterminants_n_states)
   psi_svd_beta    double precision (spindeterminants_n_det_beta,spindeterminants_n_svd_coefs,spindeterminants_n_states)
diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f
index 232c9e2b..d5421a22 100644
--- a/src/determinants/spindeterminants.irp.f
+++ b/src/determinants/spindeterminants.irp.f
@@ -307,8 +307,12 @@ integer function get_index_in_psi_det_beta_unique(key,Nint)
 
 end
 
-
 subroutine write_spindeterminants
+  !todo: modify for complex (not called anywhere?)
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
   use bitmasks
   implicit none
   integer(8), allocatable        :: tmpdet(:,:)
@@ -349,8 +353,12 @@ subroutine write_spindeterminants
   enddo
   call ezfio_set_spindeterminants_psi_det_beta(psi_det_beta_unique)
   deallocate(tmpdet)
-
+  
+  if (is_complex) then
+    call ezfio_set_spindeterminants_psi_coef_matrix_values_complex(psi_bilinear_matrix_values_complex)
+  else
   call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
+  endif
   call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
   call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
 
@@ -395,6 +403,18 @@ end
 
   det_alpha_norm = 0.d0
   det_beta_norm  = 0.d0
+  if (is_complex) then
+    do k=1,N_det
+      i = psi_bilinear_matrix_rows(k)
+      j = psi_bilinear_matrix_columns(k)
+      f = 0.d0
+      do l=1,N_states
+        f += cdabs(psi_bilinear_matrix_values_complex(k,l)*psi_bilinear_matrix_values_complex(k,l)) * state_average_weight(l)
+      enddo
+      det_alpha_norm(i) += f
+      det_beta_norm(j)  += f
+    enddo
+  else
   do k=1,N_det
     i = psi_bilinear_matrix_rows(k)
     j = psi_bilinear_matrix_columns(k)
@@ -405,6 +425,7 @@ end
     det_alpha_norm(i) += f
     det_beta_norm(j)  += f
   enddo
+  endif
   det_alpha_norm = det_alpha_norm
   det_beta_norm = det_beta_norm
 
@@ -417,8 +438,37 @@ END_PROVIDER
 !                                                                              !
 !==============================================================================!
 
- BEGIN_PROVIDER  [ double precision, psi_bilinear_matrix_values, (N_det,N_states) ]
-&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows   , (N_det) ]
+BEGIN_PROVIDER  [ double precision, psi_bilinear_matrix_values, (N_det,N_states) ]
+  use bitmasks
+  PROVIDE psi_bilinear_matrix_rows
+  integer :: k,l
+  do k=1,N_det
+    do l=1,N_states
+      psi_bilinear_matrix_values(k,l) = psi_coef(k,l)
+    enddo
+  enddo
+  do l=1,N_states
+    call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det)
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER  [ complex*16, psi_bilinear_matrix_values_complex, (N_det,N_states) ]
+  use bitmasks
+  PROVIDE psi_bilinear_matrix_rows
+  integer :: k,l
+  do k=1,N_det
+    do l=1,N_states
+      psi_bilinear_matrix_values_complex(k,l) = psi_coef_complex(k,l)
+    enddo
+  enddo
+  do l=1,N_states
+    call cdset_order(psi_bilinear_matrix_values_complex(1,l),psi_bilinear_matrix_order,N_det)
+  enddo
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows   , (N_det) ]
 &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns, (N_det) ]
 &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order  , (N_det) ]
   use bitmasks
@@ -433,10 +483,13 @@ END_PROVIDER
   END_DOC
   integer                        :: i,j,k, l
   integer(bit_kind)              :: tmp_det(N_int,2)
-  integer, external              :: get_index_in_psi_det_sorted_bit
+!  integer, external              :: get_index_in_psi_det_sorted_bit
 
-
-  PROVIDE psi_coef_sorted_bit
+  if (is_complex) then
+    PROVIDE psi_coef_sorted_bit_complex
+  else
+    PROVIDE psi_coef_sorted_bit
+  endif
 
   integer*8, allocatable         :: to_sort(:)
   integer, external              :: get_index_in_psi_det_alpha_unique
@@ -452,9 +505,6 @@ END_PROVIDER
     ASSERT (j>0)
     ASSERT (j<=N_det_beta_unique)
 
-    do l=1,N_states
-      psi_bilinear_matrix_values(k,l) = psi_coef(k,l)
-    enddo
     psi_bilinear_matrix_rows(k) = i
     psi_bilinear_matrix_columns(k) = j
     to_sort(k) = int(N_det_alpha_unique,8) * int(j-1,8) + int(i,8)
@@ -470,11 +520,6 @@ END_PROVIDER
   !$OMP SINGLE
   call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det)
   !$OMP END SINGLE
-  !$OMP DO
-  do l=1,N_states
-    call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det)
-  enddo
-  !$OMP END DO
   !$OMP END PARALLEL
   deallocate(to_sort)
   ASSERT (minval(psi_bilinear_matrix_rows) == 1)
@@ -539,8 +584,71 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1)
 
 END_PROVIDER
 
- BEGIN_PROVIDER  [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ]
-&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows   , (N_det) ]
+BEGIN_PROVIDER  [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ]
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Transpose of :c:data:`psi_bilinear_matrix`
+  !
+  ! $D_\beta^\dagger.C^\dagger.D_\alpha$
+  !
+  ! Rows are $\alpha$ determinants and columns are $\beta$, but the matrix is stored in row major
+  ! format.
+  END_DOC
+  integer                        :: k,l
+
+  PROVIDE psi_bilinear_matrix_transp_rows
+
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,l)
+  do l=1,N_states
+    !$OMP DO
+    do k=1,N_det
+      psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
+    enddo
+    !$OMP ENDDO NOWAIT
+  enddo
+  !$OMP END PARALLEL
+  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l)
+  do l=1,N_states
+    call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det)
+  enddo
+  !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+BEGIN_PROVIDER  [ complex*16, psi_bilinear_matrix_transp_values_complex, (N_det,N_states) ]
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Transpose of :c:data:`psi_bilinear_matrix`
+  !
+  ! $D_\beta^\dagger.C^\dagger.D_\alpha$
+  !
+  ! Rows are $\alpha$ determinants and columns are $\beta$, but the matrix is stored in row major
+  ! format.
+  END_DOC
+  integer                        :: k,l
+
+  PROVIDE psi_bilinear_matrix_transp_rows
+
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,l)
+  do l=1,N_states
+    !$OMP DO
+    do k=1,N_det
+      psi_bilinear_matrix_transp_values_complex (k,l) = psi_bilinear_matrix_values_complex (k,l)
+    enddo
+    !$OMP ENDDO NOWAIT
+  enddo
+  !$OMP END PARALLEL
+  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l)
+  do l=1,N_states
+    call cdset_order(psi_bilinear_matrix_transp_values_complex(1,l),psi_bilinear_matrix_transp_order,N_det)
+  enddo
+  !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows   , (N_det) ]
 &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_columns, (N_det) ]
 &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_order  , (N_det) ]
   use bitmasks
@@ -555,18 +663,15 @@ END_PROVIDER
   END_DOC
   integer                        :: i,j,k,l
 
-  PROVIDE psi_coef_sorted_bit
+  if (is_complex) then
+    PROVIDE psi_coef_sorted_bit_complex
+  else
+    PROVIDE psi_coef_sorted_bit
+  endif
 
   integer*8, allocatable         :: to_sort(:)
   allocate(to_sort(N_det))
   !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
-  do l=1,N_states
-    !$OMP DO
-    do k=1,N_det
-      psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
-    enddo
-    !$OMP ENDDO NOWAIT
-  enddo
   !$OMP DO
   do k=1,N_det
     psi_bilinear_matrix_transp_columns(k) = psi_bilinear_matrix_columns(k)
@@ -588,11 +693,6 @@ END_PROVIDER
   call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1)
   call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det)
   call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det)
-  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l)
-  do l=1,N_states
-    call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det)
-  enddo
-  !$OMP END PARALLEL DO
   deallocate(to_sort)
   ASSERT (minval(psi_bilinear_matrix_transp_columns) == 1)
   ASSERT (minval(psi_bilinear_matrix_transp_rows) == 1)
@@ -666,7 +766,30 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix, (N_det_alpha_unique,N_de
   enddo
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, psi_bilinear_matrix_complex, (N_det_alpha_unique,N_det_beta_unique,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Coefficient matrix if the wave function is expressed in a bilinear form :
+  !
+  ! $D_\alpha^\dagger.C.D_\beta$
+  END_DOC
+  integer                        :: i,j,k,istate
+  psi_bilinear_matrix_complex = (0.d0,0.d0)
+  do k=1,N_det
+    i = psi_bilinear_matrix_rows(k)
+    j = psi_bilinear_matrix_columns(k)
+    do istate=1,N_states
+      psi_bilinear_matrix_complex(i,j,istate) = psi_bilinear_matrix_values_complex(k,istate)
+    enddo
+  enddo
+END_PROVIDER
+
 subroutine create_wf_of_psi_bilinear_matrix(truncate)
+  !todo: modify for complex (not called anywhere?)
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
   use bitmasks
   implicit none
   BEGIN_DOC
@@ -738,6 +861,11 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate)
 end
 
 subroutine generate_all_alpha_beta_det_products
+  !todo: modify for complex (only used by create_wf_of_psi_bilinear_matrix?)
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
   implicit none
   BEGIN_DOC
   ! Creates a wave function from all possible $\alpha \times \beta$ determinants
@@ -881,6 +1009,11 @@ end
 
 
 subroutine copy_psi_bilinear_to_psi(psi, isize)
+  !todo: modify for complex (not called anywhere?)
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
   implicit none
   BEGIN_DOC
   ! Overwrites :c:data:`psi_det` and :c:data:`psi_coef` with the wave function
@@ -1317,6 +1450,11 @@ END_TEMPLATE
 
 
 subroutine wf_of_psi_bilinear_matrix(truncate)
+  !todo: modify for complex (not called anywhere?)
+  if (is_complex) then
+    print*,irp_here,' not implemented for complex'
+    stop -1
+  endif
   use bitmasks
   implicit none
   BEGIN_DOC
diff --git a/src/determinants/utils.irp.f b/src/determinants/utils.irp.f
index 3aec16f9..97258216 100644
--- a/src/determinants/utils.irp.f
+++ b/src/determinants/utils.irp.f
@@ -20,6 +20,28 @@ BEGIN_PROVIDER [ double precision, H_matrix_all_dets,(N_det,N_det) ]
  !$OMP END PARALLEL DO
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, h_matrix_all_dets_complex,(N_det,N_det) ]
+  use bitmasks
+ implicit none
+ BEGIN_DOC
+ ! |H| matrix on the basis of the Slater determinants defined by psi_det
+ END_DOC
+ integer :: i,j,k
+ complex*16 :: hij
+ integer :: degree(N_det),idx(0:N_det)
+ call  i_h_j_complex(psi_det(1,1,1),psi_det(1,1,1),N_int,hij)
+ !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hij,degree,idx,k) &
+ !$OMP SHARED (N_det, psi_det, N_int,h_matrix_all_dets_complex)
+ do i =1,N_det
+   do j = i, N_det
+    call  i_h_j_complex(psi_det(1,1,i),psi_det(1,1,j),N_int,hij)
+    H_matrix_all_dets_complex(i,j) = hij
+    H_matrix_all_dets_complex(j,i) = dconjg(hij)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+END_PROVIDER
+
 
 BEGIN_PROVIDER [ double precision, S2_matrix_all_dets,(N_det,N_det) ]
   use bitmasks
diff --git a/src/determinants/zmq.irp.f b/src/determinants/zmq.irp.f
index 5a114533..4d66f27a 100644
--- a/src/determinants/zmq.irp.f
+++ b/src/determinants/zmq.irp.f
@@ -13,6 +13,7 @@ integer function zmq_put_psi(zmq_to_qp_run_socket,worker_id)
   integer, external              :: zmq_put_psi_det_size
   integer*8, external            :: zmq_put_psi_det
   integer*8, external            :: zmq_put_psi_coef
+  integer*8, external            :: zmq_put_psi_coef_complex
 
   zmq_put_psi = 0
   if (zmq_put_N_states(zmq_to_qp_run_socket, worker_id) == -1) then
@@ -31,11 +32,17 @@ integer function zmq_put_psi(zmq_to_qp_run_socket,worker_id)
     zmq_put_psi = -1
     return
   endif
+  if (is_complex) then
+    if (zmq_put_psi_coef_complex(zmq_to_qp_run_socket, worker_id) == -1) then
+      zmq_put_psi = -1
+      return
+    endif
+  else
   if (zmq_put_psi_coef(zmq_to_qp_run_socket, worker_id) == -1) then
     zmq_put_psi = -1
     return
   endif
-
+  endif
 end
 
 
@@ -54,6 +61,7 @@ integer function zmq_get_psi_notouch(zmq_to_qp_run_socket, worker_id)
   integer, external              :: zmq_get_psi_det_size
   integer*8, external            :: zmq_get_psi_det
   integer*8, external            :: zmq_get_psi_coef
+  integer*8, external            :: zmq_get_psi_coef_complex
 
   zmq_get_psi_notouch = 0
 
@@ -75,19 +83,34 @@ integer function zmq_get_psi_notouch(zmq_to_qp_run_socket, worker_id)
     allocate(psi_det(N_int,2,psi_det_size))
   endif
 
+  if (is_complex) then
+    if (size(psi_coef_complex,kind=8) /= psi_det_size*N_states) then
+      deallocate(psi_coef_complex)
+      allocate(psi_coef_complex(psi_det_size,N_states))
+    endif
+  else
   if (size(psi_coef,kind=8) /= psi_det_size*N_states) then
     deallocate(psi_coef)
     allocate(psi_coef(psi_det_size,N_states))
   endif
+  endif
 
   if (zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) == -1_8) then
     zmq_get_psi_notouch = -1
     return
   endif
+
+  if (is_complex) then
+    if (zmq_get_psi_coef_complex(zmq_to_qp_run_socket, worker_id) == -1_8) then
+      zmq_get_psi_notouch = -1
+      return
+    endif
+  else
   if (zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) == -1_8) then
     zmq_get_psi_notouch = -1
     return
   endif
+  endif
 
 end
 
@@ -102,8 +125,11 @@ integer function zmq_get_psi(zmq_to_qp_run_socket, worker_id)
   integer, intent(in)            :: worker_id
   integer, external :: zmq_get_psi_notouch
   zmq_get_psi = zmq_get_psi_notouch(zmq_to_qp_run_socket, worker_id)
+  if (is_complex) then
+    SOFT_TOUCH psi_det psi_coef_complex psi_det_size N_det N_states
+  else
   SOFT_TOUCH psi_det psi_coef psi_det_size N_det N_states
-
+  endif
 end
 
 
@@ -146,12 +172,20 @@ integer function zmq_put_psi_bilinear(zmq_to_qp_run_socket,worker_id)
     zmq_put_psi_bilinear = -1
     return
   endif
-
+  
+  if (is_complex) then
+    integer*8, external            :: zmq_put_psi_bilinear_matrix_values_complex
+    if (zmq_put_psi_bilinear_matrix_values_complex(zmq_to_qp_run_socket, worker_id) == -1) then
+      zmq_put_psi_bilinear = -1
+      return
+    endif
+  else
   integer*8, external            :: zmq_put_psi_bilinear_matrix_values
   if (zmq_put_psi_bilinear_matrix_values(zmq_to_qp_run_socket, worker_id) == -1) then
     zmq_put_psi_bilinear = -1
     return
   endif
+  endif
 
   integer, external            :: zmq_put_N_det_alpha_unique
   if (zmq_put_N_det_alpha_unique(zmq_to_qp_run_socket,worker_id) == -1) then
@@ -197,10 +231,17 @@ integer function zmq_get_psi_bilinear(zmq_to_qp_run_socket, worker_id)
 
   zmq_get_psi_bilinear= 0
 
+  if (is_complex) then
+    if (size(psi_bilinear_matrix_values_complex,kind=8) /= N_det*N_states) then
+      deallocate(psi_bilinear_matrix_values_complex)
+      allocate(psi_bilinear_matrix_values_complex(N_det,N_states))
+    endif
+  else
   if (size(psi_bilinear_matrix_values,kind=8) /= N_det*N_states) then
     deallocate(psi_bilinear_matrix_values)
     allocate(psi_bilinear_matrix_values(N_det,N_states))
   endif
+  endif
 
   if (size(psi_bilinear_matrix_rows,kind=8) /= N_det) then
     deallocate(psi_bilinear_matrix_rows)
@@ -216,12 +257,20 @@ integer function zmq_get_psi_bilinear(zmq_to_qp_run_socket, worker_id)
     deallocate(psi_bilinear_matrix_order)
     allocate(psi_bilinear_matrix_order(N_det))
   endif
-
+  
+  if (is_complex) then
+    integer*8, external            :: zmq_get_psi_bilinear_matrix_values_complex
+    if (zmq_get_psi_bilinear_matrix_values_complex(zmq_to_qp_run_socket, worker_id) == -1_8) then
+      zmq_get_psi_bilinear = -1
+      return
+    endif
+  else
   integer*8, external            :: zmq_get_psi_bilinear_matrix_values
   if (zmq_get_psi_bilinear_matrix_values(zmq_to_qp_run_socket, worker_id) == -1_8) then
     zmq_get_psi_bilinear = -1
     return
   endif
+  endif
 
   integer*8, external            :: zmq_get_psi_bilinear_matrix_rows
   if (zmq_get_psi_bilinear_matrix_rows(zmq_to_qp_run_socket, worker_id) == -1_8) then
@@ -266,7 +315,11 @@ integer function zmq_get_psi_bilinear(zmq_to_qp_run_socket, worker_id)
     return
   endif
 
+  if (is_complex) then
+    SOFT_TOUCH psi_bilinear_matrix_values_complex psi_bilinear_matrix_rows psi_bilinear_matrix_columns psi_bilinear_matrix_order psi_det psi_coef_complex psi_det_size N_det N_states psi_det_beta_unique psi_det_alpha_unique N_det_beta_unique N_det_alpha_unique
+  else
   SOFT_TOUCH psi_bilinear_matrix_values psi_bilinear_matrix_rows psi_bilinear_matrix_columns psi_bilinear_matrix_order psi_det psi_coef psi_det_size N_det N_states psi_det_beta_unique psi_det_alpha_unique N_det_beta_unique N_det_alpha_unique
+  endif
 
 end
 
@@ -563,6 +616,69 @@ psi_bilinear_matrix_values ;;
 
 END_TEMPLATE
 
+BEGIN_TEMPLATE
+
+integer*8 function zmq_put_$X(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put $X on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer*8                      :: rc8
+  character*(256)                :: msg
+
+  zmq_put_$X = 0
+
+  integer*8 :: zmq_put_cdmatrix
+  integer   :: ni, nj
+
+  if (size($X,kind=8) <= 8388608_8) then
+    ni = size($X,kind=4)
+    nj = 1
+  else
+    ni = 8388608
+    nj = int(size($X,kind=8)/8388608_8,4) + 1
+  endif
+  rc8 = zmq_put_cdmatrix(zmq_to_qp_run_socket, 1, '$X', $X, ni, nj, size($X,kind=8) )
+  zmq_put_$X = rc8
+end
+
+integer*8 function zmq_get_$X(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! get $X on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer*8                      :: rc8
+  character*(256)                :: msg
+
+  zmq_get_$X = 0_8
+
+  integer*8 :: zmq_get_cdmatrix
+  integer   :: ni, nj
+
+  if (size($X,kind=8) <= 8388608_8) then
+    ni = size($X,kind=4)
+    nj = 1
+  else
+    ni = 8388608
+    nj = int(size($X,kind=8)/8388608_8,4) + 1
+  endif
+  rc8 = zmq_get_cdmatrix(zmq_to_qp_run_socket, 1, '$X', $X, ni, nj, size($X,kind=8) )
+  zmq_get_$X = rc8
+end
+
+SUBST [ X ]
+
+psi_coef_complex ;;
+psi_bilinear_matrix_values_complex ;;
+
+END_TEMPLATE
+
 
 !---------------------------------------------------------------------------
 
diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f
index 5c747081..b92257d4 100644
--- a/src/fci/fci.irp.f
+++ b/src/fci/fci.irp.f
@@ -37,7 +37,11 @@ program fci
   END_DOC
 
   if (.not.is_zmq_slave) then
-    PROVIDE psi_det psi_coef mo_two_e_integrals_in_map
+    if (is_complex) then
+      PROVIDE psi_det psi_coef_complex mo_two_e_integrals_in_map
+    else
+      PROVIDE psi_det psi_coef mo_two_e_integrals_in_map
+    endif
 
     if (do_pt2) then
       call run_stochastic_cipsi
diff --git a/src/generators_cas/generators.irp.f b/src/generators_cas/generators.irp.f
index b2f58202..cc87a0af 100644
--- a/src/generators_cas/generators.irp.f
+++ b/src/generators_cas/generators.irp.f
@@ -82,3 +82,39 @@ BEGIN_PROVIDER [ double precision, select_max, (size_select_max) ]
   select_max = huge(1.d0)
 END_PROVIDER
 
+
+ BEGIN_PROVIDER [ complex*16, psi_coef_generators_complex, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ complex*16, psi_coef_sorted_gen_complex, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! For Single reference wave functions, the generator is the
+  ! Hartree-Fock determinant
+  END_DOC
+  integer                        :: i, k, l, m
+  logical                        :: good
+  integer, external :: number_of_holes,number_of_particles
+  integer, allocatable :: nongen(:)
+  integer :: inongen
+
+  allocate(nongen(N_det))
+
+  inongen = 0
+  m=0
+  do i=1,N_det
+    good = ( number_of_holes(psi_det_sorted(1,1,i)) ==0).and.(number_of_particles(psi_det_sorted(1,1,i))==0 )
+    if (good) then
+      m = m+1
+      psi_coef_generators_complex(m,:) = psi_coef_sorted_complex(i,:)
+    else
+      inongen += 1
+      nongen(inongen) = i
+    endif
+  enddo
+  ASSERT (m == N_det_generators)
+
+  psi_coef_sorted_gen_complex(:N_det_generators, :) = psi_coef_generators_complex(:N_det_generators, :)
+  do i=1,inongen
+    psi_coef_sorted_gen_complex(N_det_generators+i, :) = psi_coef_sorted_complex(nongen(i),:)
+  end do
+END_PROVIDER
+
diff --git a/src/generators_full/generators.irp.f b/src/generators_full/generators.irp.f
index 7f18947f..f6a42fad 100644
--- a/src/generators_full/generators.irp.f
+++ b/src/generators_full/generators.irp.f
@@ -22,20 +22,35 @@ BEGIN_PROVIDER [ integer, N_det_generators ]
  call write_int(6,N_det_generators,'Number of generators')
 END_PROVIDER
 
- BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ]
-&BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ]
+BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ]
  implicit none
  BEGIN_DOC
  ! For Single reference wave functions, the generator is the
  ! Hartree-Fock determinant
  END_DOC
  psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted(1:N_int,1:2,1:N_det)
- psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted(1:N_det,1:N_states)
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted(1:N_det,1:N_states)
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, psi_coef_generators_complex, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_coef_generators_complex(1:N_det,1:N_states) = psi_coef_sorted_complex(1:N_det,1:N_states)
+END_PROVIDER
+
  BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_gen, (N_int,2,psi_det_size) ]
-&BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ]
 &BEGIN_PROVIDER [ integer, psi_det_sorted_gen_order,     (psi_det_size)  ]
 
  implicit none
@@ -44,10 +59,26 @@ END_PROVIDER
  ! Hartree-Fock determinant
  END_DOC
  psi_det_sorted_gen = psi_det_sorted
- psi_coef_sorted_gen = psi_coef_sorted
  psi_det_sorted_gen_order = psi_det_sorted_order
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_coef_sorted_gen = psi_coef_sorted
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, psi_coef_sorted_gen_complex, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_coef_sorted_gen_complex = psi_coef_sorted_complex
+END_PROVIDER
 
 BEGIN_PROVIDER [integer, degree_max_generators]
  implicit none
diff --git a/src/hartree_fock/hf_energy.irp.f b/src/hartree_fock/hf_energy.irp.f
index a0f9f897..5a68164f 100644
--- a/src/hartree_fock/hf_energy.irp.f
+++ b/src/hartree_fock/hf_energy.irp.f
@@ -11,24 +11,52 @@ BEGIN_PROVIDER [double precision, extra_e_contrib_density]
 
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, HF_energy]
-&BEGIN_PROVIDER [ double precision, HF_two_electron_energy]
-&BEGIN_PROVIDER [ double precision, HF_one_electron_energy]
+ BEGIN_PROVIDER [ double precision, hf_energy]
+&BEGIN_PROVIDER [ double precision, hf_two_electron_energy]
+&BEGIN_PROVIDER [ double precision, hf_one_electron_energy]
  implicit none
  BEGIN_DOC
  ! Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components.
  END_DOC
- integer :: i,j
- HF_energy = nuclear_repulsion
- HF_two_electron_energy = 0.d0
- HF_one_electron_energy = 0.d0
- do j=1,ao_num
-   do i=1,ao_num
-    HF_two_electron_energy += 0.5d0 * ( ao_two_e_integral_alpha(i,j) * SCF_density_matrix_ao_alpha(i,j) &
-                                       +ao_two_e_integral_beta(i,j)  * SCF_density_matrix_ao_beta(i,j) )
-    HF_one_electron_energy += ao_one_e_integrals(i,j) * (SCF_density_matrix_ao_alpha(i,j) + SCF_density_matrix_ao_beta (i,j) )
-   enddo
- enddo
- HF_energy += HF_two_electron_energy + HF_one_electron_energy
+ integer :: i,j,k
+ hf_energy = nuclear_repulsion
+ hf_two_electron_energy = 0.d0
+ hf_one_electron_energy = 0.d0
+  if (is_complex) then
+    complex*16 :: hf_1e_tmp, hf_2e_tmp
+    hf_1e_tmp = (0.d0,0.d0)
+    hf_2e_tmp = (0.d0,0.d0)
+    do k=1,kpt_num
+      do j=1,ao_num_per_kpt
+        do i=1,ao_num_per_kpt
+          hf_2e_tmp += 0.5d0 * ( ao_two_e_integral_alpha_kpts(i,j,k) * scf_density_matrix_ao_alpha_kpts(j,i,k) &
+                                +ao_two_e_integral_beta_kpts(i,j,k)  * scf_density_matrix_ao_beta_kpts(j,i,k) )
+          hf_1e_tmp += ao_one_e_integrals_kpts(i,j,k) * (scf_density_matrix_ao_alpha_kpts(j,i,k) &
+                                                        + scf_density_matrix_ao_beta_kpts (j,i,k) )
+        enddo
+      enddo
+    enddo
+    if (dabs(dimag(hf_2e_tmp)).gt.1.d-10) then
+      print*,'HF_2e energy should be real:',irp_here
+      stop -1
+    else
+      hf_two_electron_energy = dble(hf_2e_tmp)
+    endif
+    if (dabs(dimag(hf_1e_tmp)).gt.1.d-10) then
+      print*,'HF_1e energy should be real:',irp_here
+      stop -1
+    else
+      hf_one_electron_energy = dble(hf_1e_tmp)
+    endif
+  else
+    do j=1,ao_num
+      do i=1,ao_num
+        hf_two_electron_energy += 0.5d0 * ( ao_two_e_integral_alpha(i,j) * scf_density_matrix_ao_alpha(i,j) &
+                                           +ao_two_e_integral_beta(i,j)  * scf_density_matrix_ao_beta(i,j) )
+        hf_one_electron_energy += ao_one_e_integrals(i,j) * (scf_density_matrix_ao_alpha(i,j) + scf_density_matrix_ao_beta (i,j) )
+      enddo
+    enddo
+  endif
+ hf_energy += hf_two_electron_energy + hf_one_electron_energy
 END_PROVIDER
 
diff --git a/src/hartree_fock/print_e_scf.irp.f b/src/hartree_fock/print_e_scf.irp.f
new file mode 100644
index 00000000..989c0b9c
--- /dev/null
+++ b/src/hartree_fock/print_e_scf.irp.f
@@ -0,0 +1,21 @@
+program print_e_scf
+  call run
+end
+
+subroutine run
+
+  use bitmasks
+  implicit none
+
+  !if (is_complex) then
+  !  call print_debug_scf_complex
+  !endif
+
+  print*,'hf 1e,2e,total energy'
+  print*,hf_one_electron_energy
+  print*,hf_two_electron_energy
+  print*,hf_energy
+  
+end
+
+
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
index 6ebb1b80..8e438613 100644
--- a/src/hartree_fock/scf.irp.f
+++ b/src/hartree_fock/scf.irp.f
@@ -45,19 +45,43 @@ subroutine create_guess
   END_DOC
   logical                        :: exists
   PROVIDE ezfio_filename
-  call ezfio_has_mo_basis_mo_coef(exists)
+  if (is_complex) then
+!    call ezfio_has_mo_basis_mo_coef_complex(exists)
+    call ezfio_has_mo_basis_mo_coef_kpts(exists)
+  else
+    call ezfio_has_mo_basis_mo_coef(exists)
+  endif
   if (.not.exists) then
     if (mo_guess_type == "HCore") then
-      mo_coef = ao_ortho_lowdin_coef
-      TOUCH mo_coef
-      mo_label = 'Guess'
-      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,     &
-          size(mo_one_e_integrals,1),                            &
-          size(mo_one_e_integrals,2),                            &
-          mo_label,1,.false.)
-      SOFT_TOUCH mo_coef mo_label
+      if (is_complex) then
+        !mo_coef_complex = ao_ortho_lowdin_coef_complex
+        mo_coef_kpts = ao_ortho_lowdin_coef_kpts
+        TOUCH mo_coef_kpts
+        mo_label = 'Guess'
+        !call mo_as_eigvectors_of_mo_matrix_complex(mo_one_e_integrals_kpts,     &
+        call mo_as_eigvectors_of_mo_matrix_kpts(mo_one_e_integrals_kpts,     &
+            size(mo_one_e_integrals_kpts,1),                            &
+            size(mo_one_e_integrals_kpts,2),                            &
+            size(mo_one_e_integrals_kpts,3),                            &
+            mo_label,1,.false.)
+        SOFT_TOUCH mo_coef_kpts mo_label
+      else
+        mo_coef = ao_ortho_lowdin_coef
+        TOUCH mo_coef
+        mo_label = 'Guess'
+        call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,     &
+            size(mo_one_e_integrals,1),                            &
+            size(mo_one_e_integrals,2),                            &
+            mo_label,1,.false.)
+        SOFT_TOUCH mo_coef mo_label
+      endif
     else if (mo_guess_type == "Huckel") then
-      call huckel_guess
+      if (is_complex) then
+        !call huckel_guess_complex
+        call huckel_guess_kpts
+      else
+        call huckel_guess
+      endif
     else
       print *,  'Unrecognized MO guess type : '//mo_guess_type
       stop 1
@@ -77,9 +101,17 @@ subroutine run
   integer                        :: i_it, i, j, k
 
   mo_label = "Orthonormalized"
-
-  call Roothaan_Hall_SCF
+  if (is_complex) then
+    !call roothaan_hall_scf_complex
+    call roothaan_hall_scf_kpts
+  else
+    call roothaan_hall_scf
+  endif
   call ezfio_set_hartree_fock_energy(SCF_energy)
+  print*,'hf 1e,2e,total energy'
+  print*,hf_one_electron_energy
+  print*,hf_two_electron_energy
+  print*,hf_energy
 
 end
 
diff --git a/src/hartree_fock/scf_k_real.irp.f b/src/hartree_fock/scf_k_real.irp.f
new file mode 100644
index 00000000..c666b989
--- /dev/null
+++ b/src/hartree_fock/scf_k_real.irp.f
@@ -0,0 +1,92 @@
+program scf_k_real
+  BEGIN_DOC
+! 
+! The :ref:`scf` program performs *Restricted* Hartree-Fock
+! calculations (the spatial part of the |MOs| is common for alpha and beta
+! spinorbitals).
+! 
+! It performs the following actions:
+! 
+! #. Compute/Read all the one- and two-electron integrals, and store them
+!    in memory
+! #. Check in the |EZFIO| database if there is a set of |MOs|.
+!    If there is, it will read them as initial guess. Otherwise, it will
+!    create a guess.
+! #. Perform the |SCF| iterations
+! 
+! For the keywords related to the |SCF| procedure, see the ``scf_utils``
+! directory where you will find all options.
+! 
+! At each iteration, the |MOs| are saved in the |EZFIO| database. Hence,
+! if the calculation crashes for any unexpected reason, the calculation
+! can be restarted by running again the |SCF| with the same |EZFIO|
+! database.
+! 
+! To start again a fresh |SCF| calculation, the |MOs| can be reset by
+! running the :ref:`qp_reset` command.
+! 
+! The `DIIS`_ algorithm is implemented, as well as the `level-shifting`_ 
+! method. If the |SCF| does not converge, try again with a higher value of
+! :option:`level_shift`.
+! 
+! .. _DIIS: https://en.wikipedia.org/w/index.php?title=DIIS
+! .. _level-shifting: https://doi.org/10.1002/qua.560070407
+!
+  END_DOC
+  call create_guess_k_real
+  call orthonormalize_mos_k_real
+  call run_k_real
+end
+
+subroutine create_guess_k_real
+  implicit none
+  BEGIN_DOC
+!   Create a MO guess if no MOs are present in the EZFIO directory
+  END_DOC
+  logical                        :: exists
+  PROVIDE ezfio_filename
+  call ezfio_has_mo_basis_mo_coef_kpts(exists)
+  if (.not.exists) then
+    if (mo_guess_type == "HCore") then
+        !mo_coef_complex = ao_ortho_lowdin_coef_complex
+        mo_coef_kpts = ao_ortho_lowdin_coef_kpts_real
+        TOUCH mo_coef_kpts
+        mo_label = 'Guess'
+        !call mo_as_eigvectors_of_mo_matrix_complex(mo_one_e_integrals_kpts,     &
+        call mo_as_eigvectors_of_mo_matrix_kpts_real(mo_one_e_integrals_kpts_real,     &
+            size(mo_one_e_integrals_kpts_real,1),                            &
+            size(mo_one_e_integrals_kpts_real,2),                            &
+            size(mo_one_e_integrals_kpts_real,3),                            &
+            mo_label,1,.false.)
+        SOFT_TOUCH mo_coef_kpts mo_label
+    else if (mo_guess_type == "Huckel") then
+        call huckel_guess_kpts_real
+    else
+      print *,  'Unrecognized MO guess type : '//mo_guess_type
+      stop 1
+    endif
+  endif
+end
+
+subroutine run_k_real
+
+  BEGIN_DOC
+!   Run SCF calculation
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer                        :: i_it, i, j, k
+
+  mo_label = "Orthonormalized"
+  call roothaan_hall_scf_kpts_real
+  call ezfio_set_hartree_fock_energy(SCF_energy)
+  print*,'hf 1e,2e,total energy'
+  print*,hf_one_electron_energy
+  print*,hf_two_electron_energy
+  print*,hf_energy
+
+end
+
+
diff --git a/src/iterations/print_summary.irp.f b/src/iterations/print_summary.irp.f
index d04d8a93..0fabffc7 100644
--- a/src/iterations/print_summary.irp.f
+++ b/src/iterations/print_summary.irp.f
@@ -99,3 +99,15 @@ subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_occ_pattern_,n_st,s2_
 
 end subroutine
 
+subroutine print_debug_fci
+  implicit none
+  integer :: i 
+  do i=1,n_det
+    print'(2((F25.15),2X))',psi_coef_complex(i,1)
+    call debug_det(psi_det(1,1,i),n_int)
+  enddo
+  print*,'hamiltonian'
+  do i=1,n_det
+    print '(1000(F25.15))',h_matrix_all_dets_complex(i,:)
+  enddo
+end subroutine
diff --git a/src/mo_basis/EZFIO.cfg b/src/mo_basis/EZFIO.cfg
index 874af46a..762eb268 100644
--- a/src/mo_basis/EZFIO.cfg
+++ b/src/mo_basis/EZFIO.cfg
@@ -9,11 +9,17 @@ doc: Coefficient of the i-th |AO| on the j-th |MO|
 interface: ezfio
 size: (ao_basis.ao_num,mo_basis.mo_num)
 
-[mo_coef_imag]
+[mo_coef_complex]
 type: double precision
-doc: Imaginary part of the MO coefficient of the i-th |AO| on the j-th |MO|
+doc: Complex MO coefficient of the i-th |AO| on the j-th |MO|
 interface: ezfio
-size: (ao_basis.ao_num,mo_basis.mo_num)
+size: (2,ao_basis.ao_num,mo_basis.mo_num)
+
+[mo_coef_kpts]
+type: double precision
+doc: Complex MO coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (2,ao_basis.ao_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
 
 [mo_label]
 type: character*(64)
@@ -26,6 +32,12 @@ doc: |MO| occupation numbers
 interface: ezfio
 size: (mo_basis.mo_num)
 
+[mo_occ_kpts]
+type: double precision
+doc: |MO| occupation numbers
+interface: ezfio
+size: (mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+
 [mo_class]
 type: MO_class
 doc: [ Core | Inactive | Active | Virtual | Deleted ], as defined by :ref:`qp_set_mo_class`
@@ -37,3 +49,8 @@ type: character*(32)
 doc: MD5 checksum characterizing the |AO| basis set.
 interface: ezfio
 
+[mo_num_per_kpt]
+type: integer
+doc: Number of |MOs| per kpt
+interface: ezfio
+
diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f
index 73d33901..f5310696 100644
--- a/src/mo_basis/mos.irp.f
+++ b/src/mo_basis/mos.irp.f
@@ -39,6 +39,63 @@ BEGIN_PROVIDER [ integer, mo_num ]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ integer, mo_num_per_kpt ]
+  implicit none
+  BEGIN_DOC
+  ! Number of MOs per kpt
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+    call ezfio_has_mo_basis_mo_num_per_kpt(has)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call MPI_BCAST( has, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_num_per_kpt with MPI'
+    endif
+  IRP_ENDIF
+  if (.not.has) then
+    mo_num_per_kpt = ao_ortho_canonical_num_per_kpt_max
+  else
+    if (mpi_master) then
+      call ezfio_get_mo_basis_mo_num_per_kpt(mo_num_per_kpt)
+    endif
+    IRP_IF MPI
+      call MPI_BCAST( mo_num_per_kpt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_num_per_kpt with MPI'
+      endif
+    IRP_ENDIF
+  endif
+  call write_int(6,mo_num_per_kpt,'mo_num_per_kpt')
+  ASSERT (mo_num_per_kpt > 0)
+
+END_PROVIDER
+
+subroutine get_kpt_idx_mo(idx_full,k,i)
+  implicit none
+  BEGIN_DOC
+  ! idx_full is mo index in full range (up to mo_num)
+  ! k is index of the k-point for this mo
+  ! i is index of this mo within k-point k
+  ! this assumes that all kpts have the same number of mos
+  END_DOC
+
+  integer, intent(in) :: idx_full
+  integer, intent(out) :: i,k
+  i = mod(idx_full-1,mo_num_per_kpt)+1
+  k = (idx_full-1)/mo_num_per_kpt+1
+  ASSERT (k <= kpt_num)
+end
+
 
 BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_num) ]
   implicit none
@@ -93,59 +150,6 @@ BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_num) ]
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, mo_coef_imag, (ao_num,mo_num) ]
-  implicit none
-  BEGIN_DOC
-  ! Molecular orbital coefficients on |AO| basis set
-  !
-  ! mo_coef_imag(i,j) = coefficient of the i-th |AO| on the jth |MO|
-  !
-  ! mo_label : Label characterizing the |MOs| (local, canonical, natural, etc)
-  END_DOC
-  integer                        :: i, j
-  double precision, allocatable  :: buffer(:,:)
-  logical                        :: exists
-  PROVIDE ezfio_filename
-
-
-  if (mpi_master) then
-    ! Coefs
-    call ezfio_has_mo_basis_mo_coef_imag(exists)
-  endif
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read mo_coef_imag with MPI'
-    endif
-  IRP_ENDIF
-
-  if (exists) then
-    if (mpi_master) then
-      call ezfio_get_mo_basis_mo_coef_imag(mo_coef_imag)
-      write(*,*) 'Read  mo_coef_imag'
-    endif
-    IRP_IF MPI
-      call MPI_BCAST( mo_coef_imag, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-      if (ierr /= MPI_SUCCESS) then
-        stop 'Unable to read mo_coef_imag with MPI'
-      endif
-    IRP_ENDIF
-  else
-    ! Orthonormalized AO basis
-    do i=1,mo_num
-      do j=1,ao_num
-        mo_coef_imag(j,i) = 0.d0
-      enddo
-    enddo
-  endif
-END_PROVIDER
-
 BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num, mo_num) ]
  implicit none
  BEGIN_DOC
@@ -154,7 +158,7 @@ BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num, mo_num) ]
  ! $C^{-1}.C_{mo}$
  END_DOC
  call dgemm('N','N',ao_num,mo_num,ao_num,1.d0,                   &
-     ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1),&
+     ao_ortho_cano_coef_inv, size(ao_ortho_cano_coef_inv,1),&
      mo_coef, size(mo_coef,1), 0.d0,                                 &
      mo_coef_in_ao_ortho_basis, size(mo_coef_in_ao_ortho_basis,1))
 
@@ -295,28 +299,43 @@ subroutine mix_mo_jk(j,k)
   ! by convention, the '+' |MO| is in the lowest  index (min(j,k))
   ! by convention, the '-' |MO| is in the highest index (max(j,k))
   END_DOC
-  double precision               :: array_tmp(ao_num,2),dsqrt_2
   if(j==k)then
     print*,'You want to mix two orbitals that are the same !'
     print*,'It does not make sense ... '
     print*,'Stopping ...'
     stop
   endif
-  array_tmp = 0.d0
+  double precision               :: dsqrt_2
   dsqrt_2 = 1.d0/dsqrt(2.d0)
-  do i = 1, ao_num
-    array_tmp(i,1) = dsqrt_2 * (mo_coef(i,j) + mo_coef(i,k))
-    array_tmp(i,2) = dsqrt_2 * (mo_coef(i,j) - mo_coef(i,k))
-  enddo
   i_plus = min(j,k)
   i_minus = max(j,k)
-  do i = 1, ao_num
-    mo_coef(i,i_plus) = array_tmp(i,1)
-    mo_coef(i,i_minus) = array_tmp(i,2)
-  enddo
+  if (is_complex) then
+    complex*16               :: array_tmp_c(ao_num,2)
+    array_tmp_c = (0.d0,0.d0)
+    do i = 1, ao_num
+      array_tmp_c(i,1) = dsqrt_2 * (mo_coef_complex(i,j) + mo_coef_complex(i,k))
+      array_tmp_c(i,2) = dsqrt_2 * (mo_coef_complex(i,j) - mo_coef_complex(i,k))
+    enddo
+    do i = 1, ao_num
+      mo_coef_complex(i,i_plus) = array_tmp_c(i,1)
+      mo_coef_complex(i,i_minus) = array_tmp_c(i,2)
+    enddo
+  else
+    double precision               :: array_tmp(ao_num,2)
+    array_tmp = 0.d0
+    do i = 1, ao_num
+      array_tmp(i,1) = dsqrt_2 * (mo_coef(i,j) + mo_coef(i,k))
+      array_tmp(i,2) = dsqrt_2 * (mo_coef(i,j) - mo_coef(i,k))
+    enddo
+    do i = 1, ao_num
+      mo_coef(i,i_plus) = array_tmp(i,1)
+      mo_coef(i,i_minus) = array_tmp(i,2)
+    enddo
+  endif
 
 end
 
+
 subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
   implicit none
   BEGIN_DOC
@@ -333,13 +352,13 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
 
   call dgemm('T','N', ao_num, ao_num, ao_num,                        &
       1.d0,                                                          &
-      ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1),&
+      ao_ortho_cano_coef_inv, size(ao_ortho_cano_coef_inv,1),&
       A_ao,size(A_ao,1),                                             &
       0.d0, T, size(T,1))
 
   call dgemm('N','N', ao_num, ao_num, ao_num, 1.d0,                  &
       T, size(T,1),                                                  &
-      ao_ortho_canonical_coef_inv,size(ao_ortho_canonical_coef_inv,1),&
+      ao_ortho_cano_coef_inv,size(ao_ortho_cano_coef_inv,1),&
       0.d0, A, size(A,1))
 
   deallocate(T)
diff --git a/src/mo_basis/mos_cplx.irp.f b/src/mo_basis/mos_cplx.irp.f
new file mode 100644
index 00000000..19abe30e
--- /dev/null
+++ b/src/mo_basis/mos_cplx.irp.f
@@ -0,0 +1,481 @@
+!BEGIN_PROVIDER [ integer, mo_num_per_kpt ]
+! implicit none
+! BEGIN_DOC
+! ! number of mos per kpt.
+! END_DOC
+! mo_num_per_kpt = mo_num/kpt_num
+!END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, mo_coef_complex, (ao_num,mo_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Molecular orbital coefficients on |AO| basis set
+  !
+  ! mo_coef_imag(i,j) = coefficient of the i-th |AO| on the jth |MO|
+  !
+  ! mo_label : Label characterizing the |MOs| (local, canonical, natural, etc)
+  END_DOC
+  integer                        :: i, j
+  logical                        :: exists
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    ! Coefs
+    call ezfio_has_mo_basis_mo_coef_complex(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_coef_complex with MPI'
+    endif
+  IRP_ENDIF
+  
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_mo_basis_mo_coef_complex(mo_coef_complex)
+      write(*,*) 'Read  mo_coef_complex'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST( mo_coef_complex, mo_num*ao_num, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_coef_complex with MPI'
+      endif
+    IRP_ENDIF
+  else
+    ! Orthonormalized AO basis
+    do i=1,mo_num
+      do j=1,ao_num
+        mo_coef_complex(j,i) = ao_ortho_canonical_coef_complex(j,i)
+      enddo
+    enddo
+  endif
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, mo_coef_in_ao_ortho_basis_complex, (ao_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |MO| coefficients in orthogonalized |AO| basis
+ !
+ ! $C^{-1}.C_{mo}$
+ END_DOC
+ call zgemm('N','N',ao_num,mo_num,ao_num,(1.d0,0.d0),                   &
+     ao_ortho_cano_coef_inv_cplx, size(ao_ortho_cano_coef_inv_cplx,1),&
+     mo_coef_complex, size(mo_coef_complex,1), (0.d0,0.d0),                                 &
+     mo_coef_in_ao_ortho_basis_complex, size(mo_coef_in_ao_ortho_basis_complex,1))
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, mo_coef_complex_kpts, (ao_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! nonzero blocks of |MO| coefficients
+  !
+  END_DOC
+  integer :: i,j,k, mo_shft, ao_shft
+  mo_coef_complex_kpts = (0.d0,0.d0)
+
+ ! do k=1,kpt_num
+ !   mo_shft = (k-1)*mo_num_per_kpt
+ !   ao_shft = (k-1)*ao_num_per_kpt
+ !   do i=1,mo_num_per_kpt
+ !     do j=1,ao_num_per_kpt
+ !       mo_coef_complex_kpts(j,i,k) = mo_coef_complex(j+ao_shft,i+mo_shft)
+ !     enddo
+ !   enddo
+ ! enddo
+  do k=1,kpt_num
+    do i=1,mo_num_per_kpt
+      do j=1,ao_num_per_kpt
+        mo_coef_complex_kpts(j,i,k) = mo_coef_kpts(j,i,k)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ complex*16, mo_coef_transp_complex, (mo_num,ao_num) ]
+&BEGIN_PROVIDER [ complex*16, mo_coef_transp_complex_conjg, (mo_num,ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! |MO| coefficients on |AO| basis set
+  END_DOC
+  integer                        :: i, j
+
+  do j=1,ao_num
+    do i=1,mo_num
+      mo_coef_transp_complex(i,j) = mo_coef_complex(j,i)
+      mo_coef_transp_complex_conjg(i,j) = dconjg(mo_coef_complex(j,i))
+    enddo
+  enddo
+
+END_PROVIDER
+
+subroutine ao_to_mo_complex(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the AO basis to the MO basis
+  ! where A is complex in the AO basis
+  !
+  ! C^\dagger.A_ao.C
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,mo_num)
+  complex*16, allocatable  :: T(:,:)
+  
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  
+  call zgemm('N','N', ao_num, mo_num, ao_num,                    &
+      (1.d0,0.d0), A_ao,LDA_ao,                                      &
+      mo_coef_complex, size(mo_coef_complex,1),                                      &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('C','N', mo_num, mo_num, ao_num,                &
+      (1.d0,0.d0), mo_coef_complex,size(mo_coef_complex,1),                          &
+      T, ao_num,                                                     &   
+      (0.d0,0.d0), A_mo, size(A_mo,1))
+  
+  deallocate(T)
+end
+
+subroutine ao_to_mo_noconjg_complex(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the AO basis to the MO basis
+  ! where A is complex in the AO basis
+  !
+  ! C^T.A_ao.C
+  ! needed for 4idx tranform in four_idx_novvvv
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,mo_num)
+  complex*16, allocatable  :: T(:,:)
+  
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  
+  call zgemm('N','N', ao_num, mo_num, ao_num,                    &
+      (1.d0,0.d0), A_ao,LDA_ao,                                      &
+      mo_coef_complex, size(mo_coef_complex,1),                                      &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('T','N', mo_num, mo_num, ao_num,                &
+      (1.d0,0.d0), mo_coef_complex,size(mo_coef_complex,1),                          &
+      T, ao_num,                                                     &   
+      (0.d0,0.d0), A_mo, size(A_mo,1))
+  
+  deallocate(T)
+end
+
+
+subroutine ao_ortho_cano_to_ao_cplx(A_ao,LDA_ao,A,LDA)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the orthogonal |AO| basis
+  !
+  ! $C^{-1}.A_{ao}.C^{\dagger-1}$
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA
+  complex*16, intent(in)   :: A_ao(LDA_ao,*)
+  complex*16, intent(out)  :: A(LDA,*)
+  complex*16, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num,ao_num) )
+
+  call zgemm('C','N', ao_num, ao_num, ao_num,                        &
+      (1.d0,0.d0),                                                     &
+      ao_ortho_cano_coef_inv_cplx, size(ao_ortho_cano_coef_inv_cplx,1),&
+      A_ao,size(A_ao,1),                                             &
+      (0.d0,0.d0), T, size(T,1))
+
+  call zgemm('N','N', ao_num, ao_num, ao_num, (1.d0,0.d0),             &
+      T, size(T,1),                                                  &
+      ao_ortho_cano_coef_inv_cplx,size(ao_ortho_cano_coef_inv_cplx,1),&
+      (0.d0,0.d0), A, size(A,1))
+
+  deallocate(T)
+end
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+
+BEGIN_PROVIDER [ complex*16, mo_coef_kpts, (ao_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Molecular orbital coefficients on |AO| basis set
+  !
+  ! mo_coef_kpts(i,j,k) = coefficient of the i-th |AO| on the jth |MO| in kth kpt
+  !
+  ! mo_label : Label characterizing the |MOs| (local, canonical, natural, etc)
+  END_DOC
+  integer                        :: i, j, k
+  logical                        :: exists
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    ! Coefs
+    call ezfio_has_mo_basis_mo_coef_kpts(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_coef_kpts with MPI'
+    endif
+  IRP_ENDIF
+  
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_mo_basis_mo_coef_kpts(mo_coef_kpts)
+      write(*,*) 'Read  mo_coef_kpts'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST( mo_coef_kpts, kpt_num*mo_num_per_kpt*ao_num_per_kpt, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_coef_kpts with MPI'
+      endif
+    IRP_ENDIF
+  else
+    ! Orthonormalized AO basis
+
+    do k=1,kpt_num
+      do i=1,mo_num_per_kpt
+        do j=1,ao_num_per_kpt
+          mo_coef_kpts(j,i,k) = ao_ortho_canonical_coef_kpts(j,i,k)
+        enddo
+      enddo
+    enddo
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, mo_coef_in_ao_ortho_basis_kpts, (ao_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+ implicit none
+ BEGIN_DOC
+ ! |MO| coefficients in orthogonalized |AO| basis
+ !
+ ! $C^{-1}.C_{mo}$
+ END_DOC
+ integer :: k
+ do k=1,kpt_num
+
+   call zgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,ao_num_per_kpt,(1.d0,0.d0),                   &
+       ao_ortho_cano_coef_inv_kpts(:,:,k), size(ao_ortho_cano_coef_inv_kpts,1),&
+       mo_coef_kpts(:,:,k), size(mo_coef_kpts,1), (0.d0,0.d0),                                 &
+       mo_coef_in_ao_ortho_basis_kpts(:,:,k), size(mo_coef_in_ao_ortho_basis_kpts,1))
+ enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, mo_coef_transp_kpts, (mo_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, mo_coef_transp_kpts_conjg, (mo_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! |MO| coefficients on |AO| basis set
+  END_DOC
+  integer                        :: i, j, k
+
+  do k=1,kpt_num
+    do j=1,ao_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_coef_transp_kpts(i,j,k) = mo_coef_kpts(j,i,k)
+        mo_coef_transp_kpts_conjg(i,j,k) = dconjg(mo_coef_kpts(j,i,k))
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+subroutine ao_to_mo_kpts(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  !todo: check this
+  BEGIN_DOC
+  ! Transform A from the AO basis to the MO basis
+  ! where A is complex in the AO basis
+  !
+  ! C^\dagger.A_ao.C
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num_per_kpt,kpt_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,mo_num_per_kpt,kpt_num)
+  complex*16, allocatable  :: T(:,:)
+  
+  allocate ( T(ao_num_per_kpt,mo_num_per_kpt) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  integer :: k
+
+  do k=1,kpt_num
+    call zgemm('N','N', ao_num_per_kpt, mo_num_per_kpt, ao_num_per_kpt,   &
+        (1.d0,0.d0), A_ao(:,:,k),LDA_ao,                                      &
+        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1),                                      &
+        (0.d0,0.d0), T, size(T,1))
+    
+    call zgemm('C','N', mo_num_per_kpt, mo_num_per_kpt, ao_num_per_kpt,    &
+        (1.d0,0.d0), mo_coef_kpts(:,:,k),size(mo_coef_kpts,1),             &
+        T, ao_num_per_kpt,                                                     &   
+        (0.d0,0.d0), A_mo(:,:,k), size(A_mo,1))
+  enddo
+  
+  deallocate(T)
+end
+
+subroutine ao_to_mo_noconjg_kpts(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the AO basis to the MO basis
+  ! where A is complex in the AO basis
+  !
+  ! C^T.A_ao.C
+  ! needed for 4idx tranform in four_idx_novvvv
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num_per_kpt,kpt_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,mo_num_per_kpt,kpt_num)
+  complex*16, allocatable  :: T(:,:)
+  
+  allocate ( T(ao_num_per_kpt,mo_num_per_kpt) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  integer :: k
+  do k=1,kpt_num
+    call zgemm('N','N', ao_num_per_kpt, mo_num_per_kpt, ao_num_per_kpt, &
+        (1.d0,0.d0), A_ao,LDA_ao,                                      &
+        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1),                                      &
+        (0.d0,0.d0), T, size(T,1))
+    
+    call zgemm('T','N', mo_num_per_kpt, mo_num_per_kpt, ao_num_per_kpt, &
+        (1.d0,0.d0), mo_coef_kpts(:,:,k),size(mo_coef_kpts,1),            &
+        T, ao_num_per_kpt,                                                     &   
+        (0.d0,0.d0), A_mo(:,:,k), size(A_mo,1))
+  enddo 
+  deallocate(T)
+end
+
+
+subroutine ao_ortho_cano_to_ao_kpts(A_ao,LDA_ao,A,LDA)
+  implicit none
+  !todo: check this; no longer using assumed-size arrays
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the orthogonal |AO| basis
+  !
+  ! $C^{-1}.A_{ao}.C^{\dagger-1}$
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num_per_kpt,kpt_num)
+  complex*16, intent(out)  :: A(LDA,ao_num_per_kpt,kpt_num)
+  complex*16, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num_per_kpt,ao_num_per_kpt) )
+
+  integer :: k
+  do k=1,kpt_num
+    call zgemm('C','N', ao_num_per_kpt, ao_num_per_kpt, ao_num_per_kpt,                        &
+        (1.d0,0.d0),                                                     &
+        ao_ortho_cano_coef_inv_kpts(:,:,k), size(ao_ortho_cano_coef_inv_kpts,1),&
+        A_ao(:,:,k),size(A_ao,1),                                             &
+        (0.d0,0.d0), T, size(T,1))
+
+    call zgemm('N','N', ao_num_per_kpt, ao_num_per_kpt, ao_num_per_kpt, (1.d0,0.d0),             &
+        T, size(T,1),                                                  &
+        ao_ortho_cano_coef_inv_kpts(:,:,k),size(ao_ortho_cano_coef_inv_kpts,1),&
+        (0.d0,0.d0), A(:,:,k), size(A,1))
+  enddo
+
+  deallocate(T)
+end
+
+
+!============================================!
+!                                            !
+!               elec kpts                    !
+!                                            !
+!============================================!
+
+ BEGIN_PROVIDER [ integer, elec_alpha_num_kpts, (kpt_num) ]
+&BEGIN_PROVIDER [ integer, elec_beta_num_kpts, (kpt_num) ]
+  !todo: reorder? if not integer multiple, use some list of kpts to determine filling order
+  implicit none
+ 
+  integer :: i,k,kpt
+
+  PROVIDE elec_alpha_num elec_beta_num
+
+  do k=1,kpt_num
+    elec_alpha_num_kpts(k) = 0
+    elec_beta_num_kpts(k) = 0
+  enddo
+  kpt=1
+  do i=1,elec_beta_num
+    elec_alpha_num_kpts(kpt) += 1
+    elec_beta_num_kpts(kpt) += 1
+    kpt += 1
+    if (kpt > kpt_num) then
+      kpt = 1
+    endif
+  enddo
+  do i=elec_beta_num+1,elec_alpha_num
+    elec_alpha_num_kpts(kpt) += 1
+    kpt += 1
+    if (kpt > kpt_num) then
+      kpt = 1
+    endif
+  enddo
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, mo_occ_kpts, (mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! |MO| occupation numbers
+  END_DOC
+  PROVIDE ezfio_filename elec_beta_num_kpts elec_alpha_num_kpts
+  if (mpi_master) then
+    logical :: exists
+    call ezfio_has_mo_basis_mo_occ_kpts(exists)
+    if (exists) then
+      call ezfio_get_mo_basis_mo_occ_kpts(mo_occ_kpts)
+    else
+      mo_occ_kpts = 0.d0
+      integer :: i,k
+      do k=1,kpt_num
+        do i=1,elec_beta_num_kpts(k)
+          mo_occ_kpts(i,k) = 2.d0
+        enddo
+        do i=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k)
+          mo_occ_kpts(i,k) = 1.d0
+        enddo
+      enddo
+    endif
+    write(*,*) 'Read mo_occ_kpts'
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( mo_occ_kpts, mo_num_per_kpt*kpt_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_occ_kpts with MPI'
+    endif
+  IRP_ENDIF
+
+END_PROVIDER
diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f
index 12c6c79d..9409447c 100644
--- a/src/mo_basis/utils.irp.f
+++ b/src/mo_basis/utils.irp.f
@@ -1,23 +1,64 @@
 subroutine save_mos
   implicit none
   double precision, allocatable  :: buffer(:,:)
-  integer                        :: i,j
-
+  complex*16, allocatable        :: buffer_c(:,:),buffer_k(:,:,:)
+  integer                        :: i,j,k,ishft,jshft
+  !TODO: change this for periodic?
+  !      save real/imag parts of mo_coef_complex
+  !      otherwise need to make sure mo_coef and mo_coef_imag
+  !      are updated whenever mo_coef_complex changes
   call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename))
   call ezfio_set_mo_basis_mo_num(mo_num)
   call ezfio_set_mo_basis_mo_label(mo_label)
   call ezfio_set_mo_basis_ao_md5(ao_md5)
-  allocate ( buffer(ao_num,mo_num) )
-  buffer = 0.d0
-  do j = 1, mo_num
-    do i = 1, ao_num
-      buffer(i,j) = mo_coef(i,j)
+  if (is_complex) then
+    allocate ( buffer_c(ao_num,mo_num))
+    allocate ( buffer_k(ao_num_per_kpt,mo_num_per_kpt,kpt_num))
+    buffer_k = (0.d0,0.d0)
+    do k=1,kpt_num
+      do j = 1, mo_num_per_kpt
+        do i = 1, ao_num_per_kpt
+          buffer_k(i,j,k) = mo_coef_kpts(i,j,k)
+          !print*,i,j,k,buffer_k(i,j,k)
+        enddo
+      enddo
     enddo
-  enddo
-  call ezfio_set_mo_basis_mo_coef(buffer)
-  call ezfio_set_mo_basis_mo_occ(mo_occ)
+    buffer_c = (0.d0,0.d0)
+    do k=1,kpt_num
+      ishft = (k-1)*ao_num_per_kpt
+      jshft = (k-1)*mo_num_per_kpt
+      do j=1,mo_num_per_kpt
+        do i=1,ao_num_per_kpt
+          buffer_c(i+ishft,j+jshft) = buffer_k(i,j,k)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_mo_basis_mo_coef_kpts(buffer_k)
+    call ezfio_set_mo_basis_mo_coef_complex(buffer_c)
+
+    deallocate (buffer_k,buffer_c)
+    mo_occ = 0.d0
+    do k=1,kpt_num
+      ishft=(k-1)*mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_occ(i+ishft)=mo_occ_kpts(i,k)
+      enddo
+    enddo
+    call ezfio_set_mo_basis_mo_occ_kpts(mo_occ_kpts)
+    call ezfio_set_mo_basis_mo_occ(mo_occ)
+  else
+    allocate ( buffer(ao_num,mo_num) )
+    buffer = 0.d0
+    do j = 1, mo_num
+      do i = 1, ao_num
+        buffer(i,j) = mo_coef(i,j)
+      enddo
+    enddo
+    call ezfio_set_mo_basis_mo_coef(buffer)
+    deallocate (buffer)
+    call ezfio_set_mo_basis_mo_occ(mo_occ)
+  endif
   call ezfio_set_mo_basis_mo_class(mo_class)
-  deallocate (buffer)
 
 end
 
@@ -25,27 +66,43 @@ end
 subroutine save_mos_no_occ
   implicit none
   double precision, allocatable  :: buffer(:,:)
+  complex*16, allocatable        :: buffer_c(:,:)
   integer                        :: i,j
 
   call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename))
  !call ezfio_set_mo_basis_mo_num(mo_num)
  !call ezfio_set_mo_basis_mo_label(mo_label)
  !call ezfio_set_mo_basis_ao_md5(ao_md5)
-  allocate ( buffer(ao_num,mo_num) )
-  buffer = 0.d0
-  do j = 1, mo_num
-    do i = 1, ao_num
-      buffer(i,j) = mo_coef(i,j)
+  if (is_complex) then
+    print*,irp_here, ' not implemented for kpts'
+    stop -1
+    allocate ( buffer_c(ao_num,mo_num))
+    buffer_c = (0.d0,0.d0)
+    do j = 1, mo_num
+      do i = 1, ao_num
+        buffer_c(i,j) = mo_coef_complex(i,j)
+      enddo
     enddo
-  enddo
-  call ezfio_set_mo_basis_mo_coef(buffer)
-  deallocate (buffer)
+    call ezfio_set_mo_basis_mo_coef_complex(buffer_c)
+    deallocate (buffer_c)
+  else
+    allocate ( buffer(ao_num,mo_num) )
+    buffer = 0.d0
+    do j = 1, mo_num
+      do i = 1, ao_num
+        buffer(i,j) = mo_coef(i,j)
+      enddo
+    enddo
+    call ezfio_set_mo_basis_mo_coef(buffer)
+    deallocate (buffer)
+  endif
 
 end
 
 subroutine save_mos_truncated(n)
   implicit none
   double precision, allocatable  :: buffer(:,:)
+  complex*16, allocatable        :: buffer_c(:,:)
   integer                        :: i,j,n
 
   call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename))
@@ -53,17 +110,31 @@ subroutine save_mos_truncated(n)
   call ezfio_set_mo_basis_mo_num(n)
   call ezfio_set_mo_basis_mo_label(mo_label)
   call ezfio_set_mo_basis_ao_md5(ao_md5)
-  allocate ( buffer(ao_num,n) )
-  buffer = 0.d0
-  do j = 1, n
-    do i = 1, ao_num
-      buffer(i,j) = mo_coef(i,j)
+  if (is_complex) then
+    print*,irp_here, ' not implemented for kpts'
+    stop -1
+    allocate ( buffer_c(ao_num,mo_num))
+    buffer_c = (0.d0,0.d0)
+    do j = 1, n
+      do i = 1, ao_num
+        buffer_c(i,j) = mo_coef_complex(i,j)
+      enddo
     enddo
-  enddo
-  call ezfio_set_mo_basis_mo_coef(buffer)
+    call ezfio_set_mo_basis_mo_coef_complex(buffer_c)
+    deallocate (buffer_c)
+  else
+    allocate ( buffer(ao_num,n) )
+    buffer = 0.d0
+    do j = 1, n
+      do i = 1, ao_num
+        buffer(i,j) = mo_coef(i,j)
+      enddo
+    enddo
+    call ezfio_set_mo_basis_mo_coef(buffer)
+    deallocate (buffer)
+  endif
   call ezfio_set_mo_basis_mo_occ(mo_occ)
   call ezfio_set_mo_basis_mo_class(mo_class)
-  deallocate (buffer)
 
 end
 
diff --git a/src/mo_basis/utils_cplx.irp.f b/src/mo_basis/utils_cplx.irp.f
new file mode 100644
index 00000000..4d28911d
--- /dev/null
+++ b/src/mo_basis/utils_cplx.irp.f
@@ -0,0 +1,610 @@
+subroutine mo_as_eigvectors_of_mo_matrix_complex(matrix,n,m,label,sign,output)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: n,m, sign
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(n,m)
+  logical, intent(in)            :: output
+
+  integer :: i,j
+  double precision, allocatable  :: eigvalues(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), R(:,:), A(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, R
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+  allocate(A(n,m),R(n,m),mo_coef_new(ao_num,m),eigvalues(m))
+  if (sign == -1) then
+    do j=1,m
+      do i=1,n
+        A(i,j) = -matrix(i,j)
+      enddo
+    enddo
+  else
+    do j=1,m
+      do i=1,n
+        A(i,j) = matrix(i,j)
+      enddo
+    enddo
+  endif
+  mo_coef_new = mo_coef_complex
+
+  call lapack_diag_complex(eigvalues,R,A,n,m)
+  if (output) then
+    write (6,'(A)')  'MOs are now **'//trim(label)//'**'
+    write (6,'(A)') ''
+    write (6,'(A)')  'Eigenvalues'
+    write (6,'(A)') '-----------'
+    write (6,'(A)')  ''
+    write (6,'(A)') '======== ================'
+  endif
+  if (sign == -1) then
+    do i=1,m
+      eigvalues(i) = -eigvalues(i)
+    enddo
+  endif
+  if (output) then
+    do i=1,m
+      write (6,'(I8,1X,F16.10)')  i,eigvalues(i)
+    enddo
+    write (6,'(A)') '======== ================'
+    write (6,'(A)')  ''
+    !write (6,'(A)')  'Fock Matrix'
+    !write (6,'(A)') '-----------'
+    !do i=1,n
+    !  write(*,'(200(E24.15))') A(i,:)
+    !enddo
+  endif
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_new,size(mo_coef_new,1),R,size(R,1),(0.d0,0.d0),mo_coef_complex,size(mo_coef_complex,1))
+  deallocate(A,mo_coef_new,R,eigvalues)
+  call write_time(6)
+
+  mo_label = label
+end
+
+subroutine mo_as_svd_vectors_of_mo_matrix_complex(matrix,lda,m,n,label)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: lda,m,n
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(lda,n)
+
+  integer :: i,j
+  double precision :: accu
+  double precision, allocatable  :: D(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), U(:,:), A(:,:), Vt(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n))
+
+  do j=1,n
+    do i=1,m
+      A(i,j) = matrix(i,j)
+    enddo
+  enddo
+  mo_coef_new = mo_coef_complex
+
+  call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+  write (6,'(A)') 'MOs are now **'//trim(label)//'**'
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues'
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+
+  accu = 0.d0
+  do i=1,m
+    accu = accu + D(i)
+    write (6,'(I8,1X,F16.10,1X,F16.10)')  i,D(i), accu
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_new,size(mo_coef_new,1),U,size(U,1),(0.d0,0.d0),mo_coef_complex,size(mo_coef_complex,1))
+  deallocate(A,mo_coef_new,U,Vt,D)
+  call write_time(6)
+
+  mo_label = label
+end
+
+
+subroutine mo_as_svd_vectors_of_mo_matrix_eig_complex(matrix,lda,m,n,eig,label)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: lda,m,n
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(lda,n)
+  double precision, intent(out)  :: eig(m)
+
+  integer :: i,j
+  double precision :: accu
+  double precision, allocatable  :: D(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), U(:,:), A(:,:), Vt(:,:), work(:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n))
+
+  do j=1,n
+    do i=1,m
+      A(i,j) = matrix(i,j)
+    enddo
+  enddo
+  mo_coef_new = mo_coef_complex
+
+  call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+  write (6,'(A)') 'MOs are now **'//trim(label)//'**'
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues'
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+
+  accu = 0.d0
+  do i=1,m
+    accu = accu + D(i)
+    write (6,'(I8,1X,F16.10,1X,F16.10)')  i,D(i), accu
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_new,size(mo_coef_new,1),U,size(U,1),(0.d0,0.d0),mo_coef_complex,size(mo_coef_complex,1))
+
+  do i=1,m
+    eig(i) = D(i)
+  enddo
+
+  deallocate(A,mo_coef_new,U,Vt,D)
+  call write_time(6)
+
+  mo_label = label
+
+end
+
+
+subroutine mo_coef_new_as_svd_vectors_of_mo_matrix_eig_complex(matrix,lda,m,n,mo_coef_before,eig,mo_coef_new)
+  implicit none
+  BEGIN_DOC
+! You enter with matrix in the MO basis defined with the mo_coef_before. 
+!
+! You SVD the matrix and set the eigenvectors as mo_coef_new ordered by increasing singular values 
+  END_DOC
+  integer,intent(in)             :: lda,m,n
+  complex*16, intent(in)   :: matrix(lda,n),mo_coef_before(ao_num,m)
+  double precision, intent(out)  :: eig(m)
+  complex*16, intent(out)  :: mo_coef_new(ao_num,m)
+
+  integer :: i,j
+  double precision :: accu
+  double precision, allocatable  ::  D(:)
+  complex*16, allocatable  ::  mo_coef_tmp(:,:), U(:,:), A(:,:), Vt(:,:), work(:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+
+  allocate(A(lda,n),U(lda,n),D(m),Vt(lda,n),mo_coef_tmp(ao_num,mo_num))
+
+  do j=1,n
+    do i=1,m
+      A(i,j) = matrix(i,j)
+    enddo
+  enddo
+  mo_coef_tmp = mo_coef_before
+
+  call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues'
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+
+  accu = 0.d0
+  do i=1,m
+    accu = accu + D(i)
+    write (6,'(I8,1X,F16.10,1X,F16.10)')  i,D(i), accu
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_tmp,size(mo_coef_new,1),U,size(U,1),(0.d0,0.d0),mo_coef_new,size(mo_coef_new,1))
+
+  do i=1,m
+    eig(i) = D(i)
+  enddo
+
+  deallocate(A,U,Vt,D,mo_coef_tmp)
+  call write_time(6)
+
+end
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+subroutine mo_as_eigvectors_of_mo_matrix_kpts(matrix,n,m,nk,label,sign,output)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: n,m,nk, sign
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(n,m,nk)
+  logical, intent(in)            :: output
+
+  integer :: i,j,k
+  double precision, allocatable  :: eigvalues(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), R(:,:), A(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, R
+
+  call write_time(6)
+  if (m /= mo_num_per_kpt) then
+    print *, irp_here, ': Error : m/= mo_num_per_kpt'
+    stop 1
+  endif
+  if (nk /= kpt_num) then
+    print *, irp_here, ': Error : nk/= kpt_num'
+    stop 1
+  endif
+  allocate(A(n,m),R(n,m),mo_coef_new(ao_num_per_kpt,m),eigvalues(m))
+  do k=1,nk
+    if (sign == -1) then
+      do j=1,m
+        do i=1,n
+          A(i,j) = -matrix(i,j,k)
+        enddo
+      enddo
+    else
+      do j=1,m
+        do i=1,n
+          A(i,j) = matrix(i,j,k)
+        enddo
+      enddo
+    endif
+    mo_coef_new = mo_coef_kpts(:,:,k)
+
+    call lapack_diag_complex(eigvalues,R,A,n,m)
+    if (sign == -1) then
+      do i=1,m
+        eigvalues(i) = -eigvalues(i)
+      enddo
+    endif
+    if (output) then
+      do i=1,m
+        write (6,'(2(I8),1X,F16.10)') k,i,eigvalues(i)
+      enddo
+      write (6,'(A)') '======== ================'
+      write (6,'(A)')  ''
+      !write (6,'(A)')  'Fock Matrix'
+      !write (6,'(A)') '-----------'
+      !do i=1,n
+      !  write(*,'(200(E24.15))') A(i,:)
+      !enddo
+    endif
+
+    call zgemm('N','N',ao_num_per_kpt,m,m,(1.d0,0.d0), &
+        mo_coef_new,size(mo_coef_new,1),R,size(R,1),(0.d0,0.d0), &
+        mo_coef_kpts(:,:,k),size(mo_coef_kpts,1))
+  enddo
+  deallocate(A,mo_coef_new,R,eigvalues)
+  call write_time(6)
+
+  mo_label = label
+    if (output) then
+      write (6,'(A)')  'MOs are now **'//trim(label)//'**'
+      write (6,'(A)') ''
+      write (6,'(A)')  'Eigenvalues'
+      write (6,'(A)') '-----------'
+      write (6,'(A)')  ''
+      write (6,'(A)') '======== ================'
+    endif
+end
+
+subroutine mo_as_eigvectors_of_mo_matrix_kpts_real(matrix,n,m,nk,label,sign,output)
+  !TODO: test this
+  implicit none
+  integer,intent(in)             :: n,m,nk, sign
+  character*(64), intent(in)     :: label
+  double precision, intent(in)   :: matrix(n,m,nk)
+  logical, intent(in)            :: output
+
+  integer :: i,j,k
+  double precision, allocatable  :: eigvalues(:)
+  !complex*16, allocatable  :: mo_coef_new(:,:)
+  double precision, allocatable  :: mo_coef_new(:,:),mo_coef_tmp(:,:),R(:,:), A(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, R
+
+  call write_time(6)
+  if (m /= mo_num_per_kpt) then
+    print *, irp_here, ': Error : m/= mo_num_per_kpt'
+    stop 1
+  endif
+  if (nk /= kpt_num) then
+    print *, irp_here, ': Error : nk/= kpt_num'
+    stop 1
+  endif
+  allocate(A(n,m),R(n,m),mo_coef_tmp(ao_num_per_kpt,m),mo_coef_new(ao_num_per_kpt,m),eigvalues(m))
+  do k=1,nk
+    if (sign == -1) then
+      do j=1,m
+        do i=1,n
+          A(i,j) = -matrix(i,j,k)
+        enddo
+      enddo
+    else
+      do j=1,m
+        do i=1,n
+          A(i,j) = matrix(i,j,k)
+        enddo
+      enddo
+    endif
+    mo_coef_new = dble(mo_coef_kpts(:,:,k))
+
+    call lapack_diag(eigvalues,R,A,n,m)
+    if (sign == -1) then
+      do i=1,m
+        eigvalues(i) = -eigvalues(i)
+      enddo
+    endif
+    if (output) then
+      do i=1,m
+        write (6,'(2(I8),1X,F16.10)') k,i,eigvalues(i)
+      enddo
+      write (6,'(A)') '======== ================'
+      write (6,'(A)')  ''
+      !write (6,'(A)')  'Fock Matrix'
+      !write (6,'(A)') '-----------'
+      !do i=1,n
+      !  write(*,'(200(E24.15))') A(i,:)
+      !enddo
+    endif
+
+    call dgemm('N','N',ao_num_per_kpt,m,m,1.d0, &
+        mo_coef_new,size(mo_coef_new,1),R,size(R,1),0.d0, &
+        mo_coef_tmp,size(mo_coef_tmp,1))
+    call zlacp2('N',ao_num_per_kpt,m,mo_coef_tmp,size(mo_coef_tmp,1), &
+                mo_coef_kpts(:,:,k),size(mo_coef_kpts,1))
+  enddo
+  deallocate(A,mo_coef_new,mo_coef_tmp,R,eigvalues)
+  call write_time(6)
+
+  mo_label = label
+    if (output) then
+      write (6,'(A)')  'MOs are now **'//trim(label)//'**'
+      write (6,'(A)') ''
+      write (6,'(A)')  'Eigenvalues'
+      write (6,'(A)') '-----------'
+      write (6,'(A)')  ''
+      write (6,'(A)') '======== ================'
+    endif
+end
+
+subroutine mo_as_svd_vectors_of_mo_matrix_kpts(matrix,lda,m,n,label)
+  !TODO: implement
+  print *, irp_here, ' not implemented for kpts'
+  stop 1
+  implicit none
+  integer,intent(in)             :: lda,m,n
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(lda,n)
+
+  integer :: i,j
+  double precision :: accu
+  double precision, allocatable  :: D(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), U(:,:), A(:,:), Vt(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n))
+
+  do j=1,n
+    do i=1,m
+      A(i,j) = matrix(i,j)
+    enddo
+  enddo
+  mo_coef_new = mo_coef_complex
+
+  call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+  write (6,'(A)') 'MOs are now **'//trim(label)//'**'
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues'
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+
+  accu = 0.d0
+  do i=1,m
+    accu = accu + D(i)
+    write (6,'(I8,1X,F16.10,1X,F16.10)')  i,D(i), accu
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_new,size(mo_coef_new,1),U,size(U,1),(0.d0,0.d0),mo_coef_complex,size(mo_coef_complex,1))
+  deallocate(A,mo_coef_new,U,Vt,D)
+  call write_time(6)
+
+  mo_label = label
+end
+
+
+subroutine mo_as_svd_vectors_of_mo_matrix_eig_kpts(matrix,lda,m,n,nk,eig,label)
+  !TODO: implement
+  !print *, irp_here, ' not implemented for kpts'
+  !stop 1
+  implicit none
+  integer,intent(in)             :: lda,m,n,nk
+  character*(64), intent(in)     :: label
+  complex*16, intent(in)   :: matrix(lda,n,nk)
+  double precision, intent(out)  :: eig(m,nk)
+
+  integer :: i,j,k
+  double precision :: accu
+  double precision, allocatable  :: D(:)
+  complex*16, allocatable  :: mo_coef_new(:,:), U(:,:), A(:,:), Vt(:,:), work(:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: mo_coef_new, U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num_per_kpt) then
+    print *, irp_here, ': Error : m/= mo_num_per_kpt'
+    stop 1
+  endif
+
+  
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num_per_kpt,m),D(m),Vt(lda,n))
+
+  do k=1,nk
+    do j=1,n
+      do i=1,m
+        A(i,j) = matrix(i,j,k)
+      enddo
+    enddo
+    mo_coef_new(1:ao_num_per_kpt,1:m) = mo_coef_kpts(1:ao_num_per_kpt,1:m,k)
+
+    call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+
+
+    call zgemm('N','N',ao_num_per_kpt,m,m,  &
+               (1.d0,0.d0),mo_coef_new,size(mo_coef_new,1),U,size(U,1),&
+               (0.d0,0.d0),mo_coef_kpts(1,1,k),size(mo_coef_kpts,1))
+
+    do i=1,m
+      eig(i,k) = D(i)
+    enddo
+    !do j=1,mo_num_per_kpt
+    !  do i=1,mo_num_per_kpt
+    !    print'(3(I5),2(E25.15))',i,j,k,mo_coef_kpts(i,j,k)
+    !  enddo
+    !enddo
+  enddo
+
+  deallocate(A,mo_coef_new,U,Vt,D)
+
+  write (6,'(A)') 'MOs are now **'//trim(label)//'**'
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues '
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+ 
+  do k=1,nk
+    accu = 0.d0
+    do i=1,m
+      accu = accu + eig(i,k)
+      write (6,'(I8,1X,F16.10,1X,F16.10)')  i,eig(i,k), accu
+    enddo
+  write (6,'(A)')  '-------- ---------------- ----------------'
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call write_time(6)
+
+  mo_label = label
+
+end
+
+
+subroutine mo_coef_new_as_svd_vectors_of_mo_matrix_eig_kpts(matrix,lda,m,n,mo_coef_before,eig,mo_coef_new)
+  !TODO: implement
+  print *, irp_here, ' not implemented for kpts'
+  stop 1
+  implicit none
+  BEGIN_DOC
+! You enter with matrix in the MO basis defined with the mo_coef_before. 
+!
+! You SVD the matrix and set the eigenvectors as mo_coef_new ordered by increasing singular values 
+  END_DOC
+  integer,intent(in)             :: lda,m,n
+  complex*16, intent(in)   :: matrix(lda,n),mo_coef_before(ao_num,m)
+  double precision, intent(out)  :: eig(m)
+  complex*16, intent(out)  :: mo_coef_new(ao_num,m)
+
+  integer :: i,j
+  double precision :: accu
+  double precision, allocatable  ::  D(:)
+  complex*16, allocatable  ::  mo_coef_tmp(:,:), U(:,:), A(:,:), Vt(:,:), work(:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, Vt, A
+
+  call write_time(6)
+  if (m /= mo_num) then
+    print *, irp_here, ': Error : m/= mo_num'
+    stop 1
+  endif
+
+  allocate(A(lda,n),U(lda,n),D(m),Vt(lda,n),mo_coef_tmp(ao_num,mo_num))
+
+  do j=1,n
+    do i=1,m
+      A(i,j) = matrix(i,j)
+    enddo
+  enddo
+  mo_coef_tmp = mo_coef_before
+
+  call svd_complex(A,lda,U,lda,D,Vt,lda,m,n)
+
+  write (6,'(A)')  ''
+  write (6,'(A)') 'Eigenvalues'
+  write (6,'(A)')  '-----------'
+  write (6,'(A)') ''
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  '   MO       Eigenvalue       Cumulative   '
+  write (6,'(A)')  '======== ================ ================'
+
+  accu = 0.d0
+  do i=1,m
+    accu = accu + D(i)
+    write (6,'(I8,1X,F16.10,1X,F16.10)')  i,D(i), accu
+  enddo
+  write (6,'(A)')  '======== ================ ================'
+  write (6,'(A)')  ''
+
+  call zgemm('N','N',ao_num,m,m,(1.d0,0.d0),mo_coef_tmp,size(mo_coef_new,1),U,size(U,1),(0.d0,0.d0),mo_coef_new,size(mo_coef_new,1))
+
+  do i=1,m
+    eig(i) = D(i)
+  enddo
+
+  deallocate(A,U,Vt,D,mo_coef_tmp)
+  call write_time(6)
+
+end
+
diff --git a/src/mo_guess/h_core_guess_routine.irp.f b/src/mo_guess/h_core_guess_routine.irp.f
index 8fc3f6f2..1a6fd2c5 100644
--- a/src/mo_guess/h_core_guess_routine.irp.f
+++ b/src/mo_guess/h_core_guess_routine.irp.f
@@ -5,9 +5,18 @@ subroutine hcore_guess
   implicit none
   character*(64)                 :: label
   label = "Guess"
-  call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,          &
-                                     size(mo_one_e_integrals,1),  &
-                                     size(mo_one_e_integrals,2),label,1,.false.)
-  call save_mos
-  SOFT_TOUCH mo_coef mo_label
+  if (is_complex) then
+    call mo_as_eigvectors_of_mo_matrix_complex(mo_one_e_integrals_complex,  &
+                                       size(mo_one_e_integrals_complex,1),  &
+                                       size(mo_one_e_integrals_complex,2),label,1,.false.)
+    call save_mos
+    SOFT_TOUCH mo_coef_complex mo_label
+
+  else
+    call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,          &
+                                       size(mo_one_e_integrals,1),  &
+                                       size(mo_one_e_integrals,2),label,1,.false.)
+    call save_mos
+    SOFT_TOUCH mo_coef mo_label
+  endif
 end
diff --git a/src/mo_guess/mo_ortho_lowdin_cplx.irp.f b/src/mo_guess/mo_ortho_lowdin_cplx.irp.f
new file mode 100644
index 00000000..ced9a63a
--- /dev/null
+++ b/src/mo_guess/mo_ortho_lowdin_cplx.irp.f
@@ -0,0 +1,141 @@
+BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_coef_complex, (ao_num,ao_num)]
+  implicit none
+  BEGIN_DOC
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_lowdin_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_lowdin orbital
+  END_DOC
+  integer                        :: i,j,k,l
+  complex*16, allocatable  :: tmp_matrix(:,:)
+  allocate (tmp_matrix(ao_num,ao_num))
+  tmp_matrix(:,:) = (0.d0,0.d0)
+  do j=1, ao_num
+    tmp_matrix(j,j) = (1.d0,0.d0)
+  enddo
+  call ortho_lowdin_complex(ao_overlap_complex,ao_num,ao_num,tmp_matrix,ao_num,ao_num,lin_dep_cutoff)
+  do i=1, ao_num
+    do j=1, ao_num
+      ao_ortho_lowdin_coef_complex(j,i) = tmp_matrix(i,j)
+    enddo
+  enddo
+  deallocate(tmp_matrix)
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_overlap_complex, (ao_num,ao_num)]
+  implicit none
+  BEGIN_DOC
+! overlap matrix of the ao_ortho_lowdin
+! supposed to be the Identity
+  END_DOC
+  integer                        :: i,j,k,l
+  complex*16               :: c
+  do j=1, ao_num
+    do i=1, ao_num
+      ao_ortho_lowdin_overlap_complex(i,j) = (0.d0,0.d0)
+    enddo
+  enddo
+  do k=1, ao_num
+    do j=1, ao_num
+      c = (0.d0,0.d0)
+      do l=1, ao_num
+        c +=  dconjg(ao_ortho_lowdin_coef_complex(j,l)) * ao_overlap_complex(k,l)
+      enddo
+      do i=1, ao_num
+        ao_ortho_lowdin_overlap_complex(i,j) += ao_ortho_lowdin_coef_complex(i,k) * c
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_coef_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_lowdin_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_lowdin orbital
+  END_DOC
+  integer                        :: i,j,k,l
+  complex*16, allocatable  :: tmp_matrix(:,:)
+  allocate (tmp_matrix(ao_num,ao_num))
+  do k=1,kpt_num
+    tmp_matrix(:,:) = (0.d0,0.d0)
+    do j=1, ao_num
+      tmp_matrix(j,j) = (1.d0,0.d0)
+    enddo
+    call ortho_lowdin_complex(ao_overlap_kpts(:,:,k),ao_num_per_kpt,ao_num_per_kpt,tmp_matrix,ao_num_per_kpt,ao_num_per_kpt,lin_dep_cutoff)
+    do i=1, ao_num_per_kpt
+      do j=1, ao_num_per_kpt
+        ao_ortho_lowdin_coef_kpts(j,i,k) = tmp_matrix(i,j)
+      enddo
+    enddo
+  enddo
+  deallocate(tmp_matrix)
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_overlap_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+! overlap matrix of the ao_ortho_lowdin
+! supposed to be the Identity
+  END_DOC
+  integer                        :: i,j,k,l,kk
+  complex*16               :: c
+  do kk=1,kpt_num
+    do j=1, ao_num_per_kpt
+      do i=1, ao_num_per_kpt
+        ao_ortho_lowdin_overlap_kpts(i,j,kk) = (0.d0,0.d0)
+      enddo
+    enddo
+  enddo
+  do kk=1,kpt_num
+    do k=1, ao_num_per_kpt
+      do j=1, ao_num_per_kpt
+        c = (0.d0,0.d0)
+        do l=1, ao_num_per_kpt
+          c +=  dconjg(ao_ortho_lowdin_coef_kpts(j,l,kk)) * ao_overlap_kpts(k,l,kk)
+        enddo
+        do i=1, ao_num_per_kpt
+          ao_ortho_lowdin_overlap_kpts(i,j,kk) += ao_ortho_lowdin_coef_kpts(i,k,kk) * c
+        enddo
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ double precision, ao_ortho_lowdin_coef_kpts_real, (ao_num_per_kpt,ao_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+! matrix of the coefficients of the mos generated by the
+! orthonormalization by the S^{-1/2} canonical transformation of the aos
+! ao_ortho_lowdin_coef(i,j) = coefficient of the ith ao on the jth ao_ortho_lowdin orbital
+  END_DOC
+  integer                        :: i,j,k,l
+  double precision, allocatable  :: tmp_matrix(:,:)
+  allocate (tmp_matrix(ao_num,ao_num))
+  do k=1,kpt_num
+    tmp_matrix(:,:) = 0.d0
+    do j=1, ao_num
+      tmp_matrix(j,j) = 1.d0
+    enddo
+    call ortho_lowdin(ao_overlap_kpts_real(:,:,k),ao_num_per_kpt,ao_num_per_kpt,tmp_matrix,ao_num_per_kpt,ao_num_per_kpt,lin_dep_cutoff)
+    do i=1, ao_num_per_kpt
+      do j=1, ao_num_per_kpt
+        ao_ortho_lowdin_coef_kpts_real(j,i,k) = tmp_matrix(i,j)
+      enddo
+    enddo
+  enddo
+  deallocate(tmp_matrix)
+END_PROVIDER
+
diff --git a/src/mo_guess/pot_mo_ortho_cano_ints.irp.f b/src/mo_guess/pot_mo_ortho_cano_ints.irp.f
new file mode 100644
index 00000000..1b270aef
--- /dev/null
+++ b/src/mo_guess/pot_mo_ortho_cano_ints.irp.f
@@ -0,0 +1,52 @@
+BEGIN_PROVIDER [double precision, ao_ortho_cano_n_e_ints, (mo_num,mo_num)]
+ implicit none
+ integer :: i1,j1,i,j
+ double precision :: c_i1,c_j1
+
+ ao_ortho_cano_n_e_ints = 0.d0
+ !$OMP PARALLEL DO DEFAULT(none) &
+ !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) &
+ !$OMP SHARED(mo_num,ao_num,ao_ortho_canonical_coef, &
+ !$OMP   ao_ortho_cano_n_e_ints, ao_integrals_n_e)
+ do i = 1, mo_num
+   do j = 1, mo_num
+    do i1 = 1,ao_num
+     c_i1 = ao_ortho_canonical_coef(i1,i)
+     do j1 = 1,ao_num
+       c_j1 = c_i1*ao_ortho_canonical_coef(j1,j)
+       ao_ortho_cano_n_e_ints(j,i) = ao_ortho_cano_n_e_ints(j,i) + &
+                                           c_j1 * ao_integrals_n_e(j1,i1)
+     enddo
+    enddo
+   enddo
+ enddo
+ !$OMP END PARALLEL DO
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, ao_ortho_cano_n_e_ints_cplx, (mo_num,mo_num)]
+!todo: kpts
+ implicit none
+ integer :: i1,j1,i,j
+ complex*16 :: c_i1,c_j1
+
+ ao_ortho_cano_n_e_ints_cplx = (0.d0,0.d0)
+ !$OMP PARALLEL DO DEFAULT(none) &
+ !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) &
+ !$OMP SHARED(mo_num,ao_num,ao_ortho_canonical_coef_complex, &
+ !$OMP   ao_ortho_cano_n_e_ints_cplx, ao_integrals_n_e_complex)
+ do i = 1, mo_num
+   do j = 1, mo_num
+    do i1 = 1,ao_num
+     c_i1 = ao_ortho_canonical_coef_complex(i1,i)
+     do j1 = 1,ao_num
+       c_j1 = c_i1*dconjg(ao_ortho_canonical_coef_complex(j1,j))
+       ao_ortho_cano_n_e_ints_cplx(j,i) = &
+               ao_ortho_cano_n_e_ints_cplx(j,i) + &
+                c_j1 * ao_integrals_n_e_complex(j1,i1)
+     enddo
+    enddo
+   enddo
+ enddo
+ !$OMP END PARALLEL DO
+END_PROVIDER
+
diff --git a/src/mo_guess/pot_mo_ortho_canonical_ints.irp.f b/src/mo_guess/pot_mo_ortho_canonical_ints.irp.f
deleted file mode 100644
index 984d45a5..00000000
--- a/src/mo_guess/pot_mo_ortho_canonical_ints.irp.f
+++ /dev/null
@@ -1,25 +0,0 @@
-BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integrals, (mo_num,mo_num)]
- implicit none
- integer :: i1,j1,i,j
- double precision :: c_i1,c_j1
-
- ao_ortho_canonical_nucl_elec_integrals = 0.d0
- !$OMP PARALLEL DO DEFAULT(none) &
- !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) &
- !$OMP SHARED(mo_num,ao_num,ao_ortho_canonical_coef, &
- !$OMP   ao_ortho_canonical_nucl_elec_integrals, ao_integrals_n_e)
- do i = 1, mo_num
-   do j = 1, mo_num
-    do i1 = 1,ao_num
-     c_i1 = ao_ortho_canonical_coef(i1,i)
-     do j1 = 1,ao_num
-       c_j1 = c_i1*ao_ortho_canonical_coef(j1,j)
-       ao_ortho_canonical_nucl_elec_integrals(j,i) = ao_ortho_canonical_nucl_elec_integrals(j,i) + &
-                                           c_j1 * ao_integrals_n_e(j1,i1)
-     enddo
-    enddo
-   enddo
- enddo
- !$OMP END PARALLEL DO
-END_PROVIDER
-
diff --git a/src/mo_guess/pot_mo_ortho_lowdin_ints.irp.f b/src/mo_guess/pot_mo_ortho_lowdin_ints.irp.f
index 5a9f9978..4d513c2f 100644
--- a/src/mo_guess/pot_mo_ortho_lowdin_ints.irp.f
+++ b/src/mo_guess/pot_mo_ortho_lowdin_ints.irp.f
@@ -23,3 +23,29 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integrals, (mo_num,m
  !$OMP END PARALLEL DO
 END_PROVIDER
 
+BEGIN_PROVIDER [complex*16, ao_ortho_lowdin_n_e_ints_cplx, (mo_num,mo_num)]
+ implicit none
+ integer :: i1,j1,i,j
+ complex*16 :: c_i1,c_j1
+
+ ao_ortho_lowdin_n_e_ints_cplx = (0.d0,0.d0)
+ !$OMP PARALLEL DO DEFAULT(none) &
+ !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) &
+ !$OMP SHARED(mo_num,ao_num,ao_ortho_lowdin_coef_complex, &
+ !$OMP   ao_ortho_lowdin_n_e_ints_cplx, ao_integrals_n_e_complex)
+ do i = 1, mo_num
+   do j = 1, mo_num
+    do i1 = 1,ao_num
+     c_i1 = ao_ortho_lowdin_coef_complex(i1,i)
+     do j1 = 1,ao_num
+       c_j1 = c_i1*dconjg(ao_ortho_lowdin_coef_complex(j1,j))
+       ao_ortho_lowdin_n_e_ints_cplx(j,i) = &
+         ao_ortho_lowdin_n_e_ints_cplx(j,i) + &
+                                    c_j1 * ao_integrals_n_e_complex(j1,i1)
+     enddo
+    enddo
+   enddo
+ enddo
+ !$OMP END PARALLEL DO
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/EZFIO.cfg b/src/mo_one_e_ints/EZFIO.cfg
index d58b3da1..6dcb674e 100644
--- a/src/mo_one_e_ints/EZFIO.cfg
+++ b/src/mo_one_e_ints/EZFIO.cfg
@@ -4,6 +4,18 @@ doc: Nucleus-electron integrals in |MO| basis set
 size: (mo_basis.mo_num,mo_basis.mo_num)
 interface: ezfio
 
+[mo_integrals_n_e_complex]
+type: double precision
+doc: Complex nucleus-electron integrals in |MO| basis set
+size: (2,mo_basis.mo_num,mo_basis.mo_num)
+interface: ezfio
+
+[mo_integrals_n_e_kpts]
+type: double precision
+doc: Complex nucleus-electron integrals in |MO| basis set
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+interface: ezfio
+
 [io_mo_integrals_n_e]
 type: Disk_access
 doc: Read/Write |MO| electron-nucleus attraction integrals from/to disk [ Write | Read | None ]
@@ -17,12 +29,35 @@ doc: Kinetic energy integrals in |MO| basis set
 size: (mo_basis.mo_num,mo_basis.mo_num)
 interface: ezfio
 
+[mo_integrals_kinetic_complex]
+type: double precision
+doc: Complex kinetic energy integrals in |MO| basis set
+size: (2,mo_basis.mo_num,mo_basis.mo_num)
+interface: ezfio
+
+[mo_integrals_kinetic_kpts]
+type: double precision
+doc: Complex kinetic energy integrals in |MO| basis set
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+interface: ezfio
+
 [io_mo_integrals_kinetic]
 type: Disk_access
 doc: Read/Write |MO| one-electron kinetic integrals from/to disk [ Write | Read | None ]
 interface: ezfio,provider,ocaml
 default: None
 
+[mo_integrals_overlap_kpts]
+type: double precision
+doc: Complex overlap integrals in |MO| basis set
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+interface: ezfio
+
+[io_mo_integrals_overlap]
+type: Disk_access
+doc: Read/Write |MO| one-electron overlap integrals from/to disk [ Write | Read | None ]
+interface: ezfio,provider,ocaml
+default: None
 
 [mo_integrals_pseudo]
 type: double precision
@@ -30,18 +65,43 @@ doc: Pseudopotential integrals in |MO| basis set
 size: (mo_basis.mo_num,mo_basis.mo_num)
 interface: ezfio
 
+[mo_integrals_pseudo_complex]
+type: double precision
+doc: Complex pseudopotential integrals in |MO| basis set
+size: (2,mo_basis.mo_num,mo_basis.mo_num)
+interface: ezfio
+
+[mo_integrals_pseudo_kpts]
+type: double precision
+doc: Complex pseudopotential integrals in |MO| basis set
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+interface: ezfio
+
 [io_mo_integrals_pseudo]
 type: Disk_access
 doc: Read/Write |MO| pseudopotential integrals from/to disk [ Write | Read | None ]
 interface: ezfio,provider,ocaml
 default: None
 
+
 [mo_one_e_integrals]
 type: double precision
 doc: One-electron integrals in |MO| basis set
 size: (mo_basis.mo_num,mo_basis.mo_num)
 interface: ezfio
 
+[mo_one_e_integrals_complex]
+type: double precision
+doc: Complex one-electron integrals in |MO| basis set
+size: (2,mo_basis.mo_num,mo_basis.mo_num)
+interface: ezfio
+
+[mo_one_e_integrals_kpts]
+type: double precision
+doc: Complex one-electron integrals in |MO| basis set
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,nuclei.kpt_num)
+interface: ezfio
+
 [io_mo_one_e_integrals]
 type: Disk_access
 doc: Read/Write |MO| one-electron integrals from/to disk [ Write | Read | None ]
diff --git a/src/mo_one_e_ints/ao_to_mo.irp.f b/src/mo_one_e_ints/ao_to_mo.irp.f
index a0d8caaa..f388119b 100644
--- a/src/mo_one_e_ints/ao_to_mo.irp.f
+++ b/src/mo_one_e_ints/ao_to_mo.irp.f
@@ -63,4 +63,3 @@ BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num, mo_num) ]
 
 END_PROVIDER
 
-
diff --git a/src/mo_one_e_ints/ao_to_mo_cplx.irp.f b/src/mo_one_e_ints/ao_to_mo_cplx.irp.f
new file mode 100644
index 00000000..875d84a9
--- /dev/null
+++ b/src/mo_one_e_ints/ao_to_mo_cplx.irp.f
@@ -0,0 +1,146 @@
+subroutine mo_to_ao_complex(A_mo,LDA_mo,A_ao,LDA_ao)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the MO basis to the AO basis
+  !
+  ! (S.C).A_mo.(S.C)t
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)         :: A_mo(LDA_mo,mo_num)
+  complex*16, intent(out)        :: A_ao(LDA_ao,ao_num)
+  complex*16, allocatable        :: T(:,:)
+  
+  allocate ( T(mo_num,ao_num) )
+  
+  call zgemm('N','C', mo_num, ao_num, mo_num,                &
+      (1.d0,0.d0), A_mo,size(A_mo,1),                                &
+      S_mo_coef_complex, size(S_mo_coef_complex,1),                                  &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('N','N', ao_num, ao_num, mo_num,                    &
+      (1.d0,0.d0), S_mo_coef_complex, size(S_mo_coef_complex,1),                     &
+      T, size(T,1),                                                  &
+      (0.d0,0.d0), A_ao, size(A_ao,1))
+  
+  deallocate(T)
+end
+
+subroutine mo_to_ao_no_overlap_complex(A_mo,LDA_mo,A_ao,LDA_ao)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the MO basis to the S^-1 AO basis
+  ! Useful for density matrix
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)         :: A_mo(LDA_mo,mo_num)
+  complex*16, intent(out)        :: A_ao(LDA_ao,ao_num)
+  complex*16, allocatable        :: T(:,:)
+  
+  allocate ( T(mo_num,ao_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  
+  call zgemm('N','C', mo_num, ao_num, mo_num,                &
+      (1.d0,0.d0), A_mo,size(A_mo,1),                                &
+      mo_coef_complex, size(mo_coef_complex,1),                                      &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('N','N', ao_num, ao_num, mo_num,                    &
+      (1.d0,0.d0), mo_coef_complex,size(mo_coef_complex,1),                          &
+      T, size(T,1),                                                  &
+      (0.d0,0.d0), A_ao, size(A_ao,1))
+  
+  deallocate(T)
+end
+
+BEGIN_PROVIDER [ complex*16, S_mo_coef_complex, (ao_num, mo_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Product S.C where S is the overlap matrix in the AO basis and C the mo_coef matrix.
+  END_DOC
+
+  call zgemm('N','N',ao_num, mo_num, ao_num, (1.d0,0.d0), &
+        ao_overlap_complex, size(ao_overlap_complex,1), &
+        mo_coef_complex, size(mo_coef_complex,1), &
+        (0.d0,0.d0), &
+        S_mo_coef_complex, size(S_mo_coef_complex,1))
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+subroutine mo_to_ao_kpts(A_mo,LDA_mo,A_ao,LDA_ao)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the MO basis to the AO basis
+  !
+  ! (S.C).A_mo.(S.C)t
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)         :: A_mo(LDA_mo,mo_num_per_kpt,kpt_num)
+  complex*16, intent(out)        :: A_ao(LDA_ao,ao_num_per_kpt,kpt_num)
+  complex*16, allocatable        :: T(:,:)
+  
+  allocate ( T(mo_num_per_kpt,ao_num_per_kpt) )
+  integer :: k
+  do k=1,kpt_num
+    call zgemm('N','C', mo_num_per_kpt, ao_num_per_kpt, mo_num_per_kpt,                &
+        (1.d0,0.d0), A_mo(:,:,k),size(A_mo,1),                                &
+        S_mo_coef_kpts(:,:,k), size(S_mo_coef_kpts,1),                                  &
+        (0.d0,0.d0), T, size(T,1))
+    
+    call zgemm('N','N', ao_num_per_kpt, ao_num_per_kpt, mo_num_per_kpt,                    &
+        (1.d0,0.d0), S_mo_coef_kpts(:,:,k), size(S_mo_coef_kpts,1),                     &
+        T, size(T,1),                                                  &
+        (0.d0,0.d0), A_ao(:,:,k), size(A_ao,1))
+  enddo
+  deallocate(T)
+end
+
+subroutine mo_to_ao_no_overlap_kpts(A_mo,LDA_mo,A_ao,LDA_ao)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the MO basis to the S^-1 AO basis
+  ! Useful for density matrix
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)         :: A_mo(LDA_mo,mo_num_per_kpt,kpt_num)
+  complex*16, intent(out)        :: A_ao(LDA_ao,ao_num_per_kpt,kpt_num)
+  complex*16, allocatable        :: T(:,:)
+  
+  allocate ( T(mo_num_per_kpt,ao_num_per_kpt) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+  integer :: k
+  do k=1,kpt_num
+    call zgemm('N','C', mo_num_per_kpt, ao_num_per_kpt, mo_num_per_kpt,                &
+        (1.d0,0.d0), A_mo(:,:,k),size(A_mo,1),                                &
+        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1),                                      &
+        (0.d0,0.d0), T, size(T,1))
+    
+    call zgemm('N','N', ao_num_per_kpt, ao_num_per_kpt, mo_num_per_kpt,                    &
+        (1.d0,0.d0), mo_coef_kpts(:,:,k),size(mo_coef_kpts,1),                          &
+        T, size(T,1),                                                  &
+        (0.d0,0.d0), A_ao(:,:,k), size(A_ao,1))
+  enddo
+  deallocate(T)
+end
+
+BEGIN_PROVIDER [ complex*16, S_mo_coef_kpts, (ao_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Product S.C where S is the overlap matrix in the AO basis and C the mo_coef matrix.
+  END_DOC
+
+  integer :: k
+  do k=1,kpt_num
+    call zgemm('N','N',ao_num_per_kpt, mo_num_per_kpt, ao_num_per_kpt, (1.d0,0.d0), &
+          ao_overlap_kpts(:,:,k), size(ao_overlap_kpts,1), &
+          mo_coef_kpts(:,:,k), size(mo_coef_kpts,1), &
+          (0.d0,0.d0), &
+          S_mo_coef_kpts(:,:,k), size(S_mo_coef_kpts,1))
+  enddo
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/kin_mo_ints.irp.f b/src/mo_one_e_ints/kin_mo_ints.irp.f
index 216628bb..e07ab690 100644
--- a/src/mo_one_e_ints/kin_mo_ints.irp.f
+++ b/src/mo_one_e_ints/kin_mo_ints.irp.f
@@ -22,3 +22,26 @@ BEGIN_PROVIDER [double precision, mo_kinetic_integrals, (mo_num,mo_num)]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, mo_kinetic_integrals_diag,(mo_num)]
+  implicit none
+  integer                        :: i
+  BEGIN_DOC
+  ! diagonal elements of mo_kinetic_integrals or mo_kinetic_integrals_complex
+  END_DOC
+  
+  if (is_complex) then
+    integer :: k,i_shft
+    PROVIDE mo_kinetic_integrals_kpts
+    do k=1,kpt_num
+      i_shft = (k-1)*mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_kinetic_integrals_diag(i+i_shft) = dble(mo_kinetic_integrals_kpts(i,i,k))
+      enddo
+    enddo
+  else
+    PROVIDE mo_kinetic_integrals
+    do i=1,mo_num
+      mo_kinetic_integrals_diag(i) = mo_kinetic_integrals(i,i)
+    enddo
+  endif
+END_PROVIDER
diff --git a/src/mo_one_e_ints/kin_mo_ints_cplx.irp.f b/src/mo_one_e_ints/kin_mo_ints_cplx.irp.f
new file mode 100644
index 00000000..dfef7801
--- /dev/null
+++ b/src/mo_one_e_ints/kin_mo_ints_cplx.irp.f
@@ -0,0 +1,60 @@
+BEGIN_PROVIDER [complex*16, mo_kinetic_integrals_complex, (mo_num,mo_num)]
+  implicit none
+  BEGIN_DOC
+  !  Kinetic energy integrals in the MO basis
+  END_DOC
+  integer :: i,j
+
+  print *, 'Providing MO kinetic integrals'
+  if (read_mo_integrals_kinetic) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_kinetic_complex(mo_kinetic_integrals_complex)
+    print *,  'MO kinetic integrals read from disk'
+  else
+  print *, 'Providing MO kinetic integrals from AO kinetic integrals'
+    call ao_to_mo_complex(                                            &
+        ao_kinetic_integrals_complex,                                 &
+        size(ao_kinetic_integrals_complex,1),                         &
+        mo_kinetic_integrals_complex,                                 &
+        size(mo_kinetic_integrals_complex,1)                          &
+        )
+  endif
+  if (write_mo_integrals_kinetic) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_kinetic_complex(mo_kinetic_integrals_complex)
+    print *,  'MO kinetic integrals written to disk'
+  endif
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, mo_kinetic_integrals_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  !  Kinetic energy integrals in the MO basis
+  END_DOC
+  integer :: i,j
+
+  print *, 'Providing MO kinetic integrals'
+  if (read_mo_integrals_kinetic) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_kinetic_kpts(mo_kinetic_integrals_kpts)
+    print *,  'MO kinetic integrals read from disk'
+  else
+  print *, 'Providing MO kinetic integrals from AO kinetic integrals'
+    call ao_to_mo_kpts(                                            &
+        ao_kinetic_integrals_kpts,                                 &
+        size(ao_kinetic_integrals_kpts,1),                         &
+        mo_kinetic_integrals_kpts,                                 &
+        size(mo_kinetic_integrals_kpts,1)                          &
+        )
+  endif
+  if (write_mo_integrals_kinetic) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_kinetic_kpts(mo_kinetic_integrals_kpts)
+    print *,  'MO kinetic integrals written to disk'
+  endif
+
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/mo_one_e_ints.irp.f b/src/mo_one_e_ints/mo_one_e_ints.irp.f
index a6a614ab..7ca8620a 100644
--- a/src/mo_one_e_ints/mo_one_e_ints.irp.f
+++ b/src/mo_one_e_ints/mo_one_e_ints.irp.f
@@ -20,3 +20,27 @@ BEGIN_PROVIDER [ double precision, mo_one_e_integrals,(mo_num,mo_num)]
   ENDIF
 
 END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, mo_one_e_integrals_diag,(mo_num)]
+  implicit none
+  integer                        :: i
+  BEGIN_DOC
+  ! diagonal elements of mo_one_e_integrals or mo_one_e_integrals_complex
+  END_DOC
+  
+  if (is_complex) then
+    integer :: k,i_shft
+    PROVIDE mo_one_e_integrals_kpts
+    do k=1,kpt_num
+      i_shft = (k-1)*mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_one_e_integrals_diag(i+i_shft) = dble(mo_one_e_integrals_kpts(i,i,k))
+      enddo
+    enddo
+  else
+    PROVIDE mo_one_e_integrals
+    do i=1,mo_num
+      mo_one_e_integrals_diag(i) = mo_one_e_integrals(i,i)
+    enddo
+  endif
+END_PROVIDER
diff --git a/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f b/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f
new file mode 100644
index 00000000..59088f6e
--- /dev/null
+++ b/src/mo_one_e_ints/mo_one_e_ints_cplx.irp.f
@@ -0,0 +1,78 @@
+BEGIN_PROVIDER [ complex*16, mo_one_e_integrals_complex,(mo_num,mo_num)]
+  implicit none
+  integer                        :: i,j,n,l
+  BEGIN_DOC
+  ! array of the one-electron Hamiltonian on the |MO| basis :
+  ! sum of the kinetic and nuclear electronic potentials (and pseudo potential if needed)
+  END_DOC
+  print*,'Providing the one-electron integrals'
+
+  IF (read_mo_one_e_integrals) THEN
+    call ezfio_get_mo_one_e_ints_mo_one_e_integrals_complex(mo_one_e_integrals_complex)
+  ELSE
+    mo_one_e_integrals_complex  = mo_integrals_n_e_complex + mo_kinetic_integrals_complex
+
+    IF (do_pseudo) THEN
+      mo_one_e_integrals_complex  += mo_pseudo_integrals_complex
+    ENDIF
+
+  ENDIF
+
+  IF (write_mo_one_e_integrals) THEN
+    call ezfio_set_mo_one_e_ints_mo_one_e_integrals_complex(mo_one_e_integrals_complex)
+    print *,  'MO one-e integrals written to disk'
+  ENDIF
+  print*,'Provided the one-electron integrals'
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ complex*16, mo_one_e_integrals_kpts,(mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  integer                        :: i,j,n,l
+  BEGIN_DOC
+  ! array of the one-electron Hamiltonian on the |MO| basis :
+  ! sum of the kinetic and nuclear electronic potentials (and pseudo potential if needed)
+  END_DOC
+  print*,'Providing the one-electron integrals'
+
+  IF (read_mo_one_e_integrals) THEN
+    call ezfio_get_mo_one_e_ints_mo_one_e_integrals_kpts(mo_one_e_integrals_kpts)
+  ELSE
+    mo_one_e_integrals_kpts  = mo_integrals_n_e_kpts + mo_kinetic_integrals_kpts
+
+    IF (do_pseudo) THEN
+      mo_one_e_integrals_kpts  += mo_pseudo_integrals_kpts
+    ENDIF
+
+  ENDIF
+
+  IF (write_mo_one_e_integrals) THEN
+    call ezfio_set_mo_one_e_ints_mo_one_e_integrals_kpts(mo_one_e_integrals_kpts)
+    print *,  'MO one-e integrals written to disk'
+  ENDIF
+  print*,'Provided the one-electron integrals'
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, mo_one_e_integrals_kpts_real,(mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! array of the one-electron Hamiltonian on the |MO| basis :
+  ! sum of the kinetic and nuclear electronic potentials (and pseudo potential if needed)
+  END_DOC
+
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_one_e_integrals_kpts_real(i,j,k) = dble(mo_one_e_integrals_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
diff --git a/src/mo_one_e_ints/mo_overlap.irp.f b/src/mo_one_e_ints/mo_overlap.irp.f
index 4ce83fcd..9d31bddb 100644
--- a/src/mo_one_e_ints/mo_overlap.irp.f
+++ b/src/mo_one_e_ints/mo_overlap.irp.f
@@ -37,3 +37,109 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_num,mo_num) ]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, mo_overlap_complex,(mo_num,mo_num) ]
+  implicit none
+  BEGIN_DOC
+! Provider to check that the MOs are indeed orthonormal.
+  END_DOC
+  integer :: i,j,n,l
+  integer :: lmax
+
+
+  lmax = (ao_num/4) * 4
+  !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(NONE) &
+  !$OMP  PRIVATE(i,j,n,l) &
+  !$OMP  SHARED(mo_overlap_complex,mo_coef_complex,ao_overlap_complex, &
+  !$OMP    mo_num,ao_num,lmax)
+  do j=1,mo_num
+   do i= 1,mo_num
+    mo_overlap_complex(i,j) = (0.d0,0.d0)
+    do n = 1, lmax,4
+     do l = 1, ao_num
+      mo_overlap_complex(i,j) = mo_overlap_complex(i,j) + dconjg(mo_coef_complex(l,i)) * &
+           ( mo_coef_complex(n  ,j) * ao_overlap_complex(l,n  )  &
+           + mo_coef_complex(n+1,j) * ao_overlap_complex(l,n+1)  &
+           + mo_coef_complex(n+2,j) * ao_overlap_complex(l,n+2)  &
+           + mo_coef_complex(n+3,j) * ao_overlap_complex(l,n+3)  )
+     enddo
+    enddo
+    do n = lmax+1, ao_num
+     do l = 1, ao_num
+      mo_overlap_complex(i,j) = mo_overlap_complex(i,j) + mo_coef_complex(n,j) * dconjg(mo_coef_complex(l,i)) * ao_overlap_complex(l,n)
+     enddo
+    enddo
+   enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, mo_overlap_kpts,(mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+! Provider to check that the MOs are indeed orthonormal.
+  END_DOC
+  integer :: i,j,n,l,k
+  integer :: lmax
+
+  print *, 'Providing MO overlap integrals'
+  if (read_mo_integrals_overlap) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_overlap_kpts(mo_overlap_kpts)
+    print *,  'MO overlap integrals read from disk'
+  else
+  print *, 'Providing MO overlap integrals from AO overlap integrals'
+  !  call ao_to_mo_kpts(                                            &
+  !      ao_kinetic_integrals_kpts,                                 &
+  !      size(ao_kinetic_integrals_kpts,1),                         &
+  !      mo_kinetic_integrals_kpts,                                 &
+  !      size(mo_kinetic_integrals_kpts,1)                          &
+  !      )
+  !endif
+  
+
+  lmax = (ao_num_per_kpt/4) * 4
+  !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(NONE) &
+  !$OMP  PRIVATE(i,j,n,l,k) &
+  !$OMP  SHARED(mo_overlap_kpts,mo_coef_kpts,ao_overlap_kpts, &
+  !$OMP    mo_num_per_kpt,ao_num_per_kpt,lmax,kpt_num)
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i= 1,mo_num_per_kpt
+        mo_overlap_kpts(i,j,k) = (0.d0,0.d0)
+        do n = 1, lmax,4
+          do l = 1, ao_num_per_kpt
+            mo_overlap_kpts(i,j,k) = mo_overlap_kpts(i,j,k) + dconjg(mo_coef_kpts(l,i,k)) * &
+                 ( mo_coef_kpts(n  ,j,k) * ao_overlap_kpts(l,n  ,k)  &
+                 + mo_coef_kpts(n+1,j,k) * ao_overlap_kpts(l,n+1,k)  &
+                 + mo_coef_kpts(n+2,j,k) * ao_overlap_kpts(l,n+2,k)  &
+                 + mo_coef_kpts(n+3,j,k) * ao_overlap_kpts(l,n+3,k)  )
+          enddo
+        enddo
+        do n = lmax+1, ao_num_per_kpt
+          do l = 1, ao_num_per_kpt
+            mo_overlap_kpts(i,j,k) = mo_overlap_kpts(i,j,k) + mo_coef_kpts(n,j,k) * &
+              dconjg(mo_coef_kpts(l,i,k)) * ao_overlap_kpts(l,n,k)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, mo_overlap_kpts_real, (mo_num_per_kpt, mo_num_per_kpt, kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Overlap for complex MOs
+  END_DOC
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_overlap_kpts_real(i,j,k) = dble(mo_overlap_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
diff --git a/src/mo_one_e_ints/orthonormalize.irp.f b/src/mo_one_e_ints/orthonormalize.irp.f
index 3a5d5488..dd6ee8ee 100644
--- a/src/mo_one_e_ints/orthonormalize.irp.f
+++ b/src/mo_one_e_ints/orthonormalize.irp.f
@@ -1,11 +1,41 @@
 subroutine orthonormalize_mos
   implicit none
-  integer :: m,p,s
-  m = size(mo_coef,1)
-  p = size(mo_overlap,1)
-  call ortho_lowdin(mo_overlap,p,mo_num,mo_coef,m,ao_num,lin_dep_cutoff)
-  mo_label = 'Orthonormalized'
-  SOFT_TOUCH mo_coef mo_label
+  integer :: m,p,s,k
+  if (is_complex) then
+    do k=1,kpt_num
+      m = size(mo_coef_kpts,1)
+      p = size(mo_overlap_kpts,1)
+      call ortho_lowdin_complex(mo_overlap_kpts(1,1,k),p,mo_num_per_kpt,mo_coef_kpts(1,1,k),m,ao_num_per_kpt,lin_dep_cutoff)
+    enddo
+    mo_label = 'Orthonormalized'
+    SOFT_TOUCH mo_coef_kpts mo_label
+  else
+    m = size(mo_coef,1)
+    p = size(mo_overlap,1)
+    call ortho_lowdin(mo_overlap,p,mo_num,mo_coef,m,ao_num,lin_dep_cutoff)
+    mo_label = 'Orthonormalized'
+    SOFT_TOUCH mo_coef mo_label
+  endif
+end
+
+
+subroutine orthonormalize_mos_k_real
+  implicit none
+  integer :: m,p,s,k
+  double precision, allocatable :: mo_coef_tmp(:,:)
+  
+  allocate(mo_coef_tmp(ao_num_per_kpt,mo_num_per_kpt))
+  do k=1,kpt_num
+    m = size(mo_coef_kpts,1)
+    p = size(mo_overlap_kpts,1)
+    mo_coef_tmp = dble(mo_coef_kpts(:,:,k))
+    call ortho_lowdin(mo_overlap_kpts_real(1,1,k),p,mo_num_per_kpt,mo_coef_tmp,m,ao_num_per_kpt,lin_dep_cutoff)
+    call zlacp2('X',ao_num_per_kpt,mo_num_per_kpt,mo_coef_tmp,size(mo_coef_tmp,1), &
+           mo_coef_kpts(1,1,k),size(mo_coef_kpts,1))
+  enddo
+  deallocate(mo_coef_tmp)
+  mo_label = 'Orthonormalized'
+  SOFT_TOUCH mo_coef_kpts mo_label
 end
 
 
diff --git a/src/mo_one_e_ints/pot_mo_ints.irp.f b/src/mo_one_e_ints/pot_mo_ints.irp.f
index 5cf4febd..c6fc53a5 100644
--- a/src/mo_one_e_ints/pot_mo_ints.irp.f
+++ b/src/mo_one_e_ints/pot_mo_ints.irp.f
@@ -44,3 +44,26 @@ BEGIN_PROVIDER [double precision, mo_integrals_n_e_per_atom, (mo_num,mo_num,nucl
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, mo_integrals_n_e_diag,(mo_num)]
+  implicit none
+  integer                        :: i
+  BEGIN_DOC
+  ! diagonal elements of mo_integrals_n_e or mo_integrals_n_e_complex
+  END_DOC
+  
+  if (is_complex) then
+    integer :: k,i_shft
+    PROVIDE mo_integrals_n_e_kpts
+    do k=1,kpt_num
+      i_shft = (k-1)*mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_integrals_n_e_diag(i+i_shft) = dble(mo_integrals_n_e_kpts(i,i,k))
+      enddo
+    enddo
+  else
+    PROVIDE mo_integrals_n_e
+    do i=1,mo_num
+      mo_integrals_n_e_diag(i) = mo_integrals_n_e(i,i)
+    enddo
+  endif
+END_PROVIDER
diff --git a/src/mo_one_e_ints/pot_mo_ints_cplx.irp.f b/src/mo_one_e_ints/pot_mo_ints_cplx.irp.f
new file mode 100644
index 00000000..f472a8ff
--- /dev/null
+++ b/src/mo_one_e_ints/pot_mo_ints_cplx.irp.f
@@ -0,0 +1,59 @@
+BEGIN_PROVIDER [complex*16, mo_integrals_n_e_complex, (mo_num,mo_num)]
+  implicit none
+  BEGIN_DOC
+  !  Kinetic energy integrals in the MO basis
+  END_DOC
+  integer :: i,j
+  
+  print *, 'Providing MO N-e integrals'
+  if (read_mo_integrals_n_e) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_n_e_complex(mo_integrals_n_e_complex)
+    print *,  'MO N-e integrals read from disk'
+  else
+  print *, 'Providing MO N-e integrals from AO N-e integrals'
+    call ao_to_mo_complex(                                            &
+        ao_integrals_n_e_complex,                                 &
+        size(ao_integrals_n_e_complex,1),                         &
+        mo_integrals_n_e_complex,                                 &
+        size(mo_integrals_n_e_complex,1)                          &
+        )
+  endif
+  if (write_mo_integrals_n_e) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_n_e_complex(mo_integrals_n_e_complex)
+    print *,  'MO N-e integrals written to disk'
+  endif
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, mo_integrals_n_e_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  !  Kinetic energy integrals in the MO basis
+  END_DOC
+  integer :: i,j
+  
+  print *, 'Providing MO N-e integrals'
+  if (read_mo_integrals_n_e) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_n_e_kpts(mo_integrals_n_e_kpts)
+    print *,  'MO N-e integrals read from disk'
+  else
+  print *, 'Providing MO N-e integrals from AO N-e integrals'
+    call ao_to_mo_kpts(                                            &
+        ao_integrals_n_e_kpts,                                 &
+        size(ao_integrals_n_e_kpts,1),                         &
+        mo_integrals_n_e_kpts,                                 &
+        size(mo_integrals_n_e_kpts,1)                          &
+        )
+  endif
+  if (write_mo_integrals_n_e) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_n_e_kpts(mo_integrals_n_e_kpts)
+    print *,  'MO N-e integrals written to disk'
+  endif
+
+END_PROVIDER
diff --git a/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f b/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
index 179b33ed..504d8c02 100644
--- a/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
+++ b/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
@@ -25,4 +25,27 @@ BEGIN_PROVIDER [double precision, mo_pseudo_integrals, (mo_num,mo_num)]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, mo_pseudo_integrals_diag,(mo_num)]
+  implicit none
+  integer                        :: i
+  BEGIN_DOC
+  ! diagonal elements of mo_pseudo_integrals or mo_pseudo_integrals_complex
+  END_DOC
+  
+  if (is_complex) then
+    integer :: k,i_shft
+    PROVIDE mo_pseudo_integrals_kpts
+    do k=1,kpt_num
+      i_shft = (k-1)*mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        mo_pseudo_integrals_diag(i+i_shft) = dble(mo_pseudo_integrals_kpts(i,i,k))
+      enddo
+    enddo
+  else
+    PROVIDE mo_pseudo_integrals
+    do i=1,mo_num
+      mo_pseudo_integrals_diag(i) = mo_pseudo_integrals(i,i)
+    enddo
+  endif
+END_PROVIDER
 
diff --git a/src/mo_one_e_ints/pot_mo_pseudo_ints_cplx.irp.f b/src/mo_one_e_ints/pot_mo_pseudo_ints_cplx.irp.f
new file mode 100644
index 00000000..ca71a995
--- /dev/null
+++ b/src/mo_one_e_ints/pot_mo_pseudo_ints_cplx.irp.f
@@ -0,0 +1,59 @@
+BEGIN_PROVIDER [complex*16, mo_pseudo_integrals_complex, (mo_num,mo_num)]
+  implicit none
+  BEGIN_DOC
+  ! Pseudopotential integrals in |MO| basis
+  END_DOC
+  integer :: i,j
+
+  if (read_mo_integrals_pseudo) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_pseudo_complex(mo_pseudo_integrals_complex)
+    print *,  'MO pseudopotential integrals read from disk'
+  else if (do_pseudo) then
+    call ao_to_mo_complex(                                            &
+        ao_pseudo_integrals_complex,                                 &
+        size(ao_pseudo_integrals_complex,1),                         &
+        mo_pseudo_integrals_complex,                                 &
+        size(mo_pseudo_integrals_complex,1)                          &
+        )
+  else
+    mo_pseudo_integrals_complex = (0.d0,0.d0)
+  endif
+  if (write_mo_integrals_pseudo) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_pseudo_complex(mo_pseudo_integrals_complex)
+    print *,  'MO pseudopotential integrals written to disk'
+  endif
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, mo_pseudo_integrals_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Pseudopotential integrals in |MO| basis
+  END_DOC
+  integer :: i,j
+
+  if (read_mo_integrals_pseudo) then
+    call ezfio_get_mo_one_e_ints_mo_integrals_pseudo_kpts(mo_pseudo_integrals_kpts)
+    print *,  'MO pseudopotential integrals read from disk'
+  else if (do_pseudo) then
+    call ao_to_mo_kpts(                                            &
+        ao_pseudo_integrals_kpts,                                 &
+        size(ao_pseudo_integrals_kpts,1),                         &
+        mo_pseudo_integrals_kpts,                                 &
+        size(mo_pseudo_integrals_kpts,1)                          &
+        )
+  else
+    mo_pseudo_integrals_kpts = (0.d0,0.d0)
+  endif
+  if (write_mo_integrals_pseudo) then
+    call ezfio_set_mo_one_e_ints_mo_integrals_pseudo_kpts(mo_pseudo_integrals_kpts)
+    print *,  'MO pseudopotential integrals written to disk'
+  endif
+
+END_PROVIDER
diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg
index ea47c51c..c708792f 100644
--- a/src/mo_two_e_ints/EZFIO.cfg
+++ b/src/mo_two_e_ints/EZFIO.cfg
@@ -17,3 +17,15 @@ doc: If `True`, computes all integrals except for the integrals having 3 or 4 vi
 interface: ezfio,provider,ocaml
 default: false
 
+[io_df_mo_integrals]
+type: Disk_access
+doc: Read/Write df |MO| integrals from/to disk [ Write | Read | None ] 
+interface: ezfio,provider,ocaml
+default: None
+
+[df_mo_integrals_complex]
+type: double precision
+doc: Complex df integrals over MOs
+size: (2,mo_basis.mo_num_per_kpt,mo_basis.mo_num_per_kpt,ao_two_e_ints.df_num,nuclei.kpt_pair_num)
+interface: ezfio
+
diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f
index 1cc50cb1..773561f0 100644
--- a/src/mo_two_e_ints/core_quantities.irp.f
+++ b/src/mo_two_e_ints/core_quantities.irp.f
@@ -7,7 +7,7 @@ BEGIN_PROVIDER [double precision, core_energy]
  core_energy = 0.d0
  do i = 1, n_core_orb
   j = list_core(i)
-  core_energy += 2.d0 * mo_one_e_integrals(j,j) + mo_two_e_integrals_jj(j,j)
+  core_energy += 2.d0 * mo_one_e_integrals_diag(j) + mo_two_e_integrals_jj(j,j)
   do k = i+1, n_core_orb
    l = list_core(k)
    core_energy += 2.d0 * (2.d0 * mo_two_e_integrals_jj(j,l) - mo_two_e_integrals_jj_exchange(j,l))
@@ -36,3 +36,25 @@ BEGIN_PROVIDER [double precision, core_fock_operator, (mo_num,mo_num)]
   enddo
  enddo
 END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, core_fock_operator_complex, (mo_num,mo_num)]
+ implicit none
+ integer :: i,j,k,l,m,n
+ complex*16 :: get_two_e_integral_complex
+ BEGIN_DOC
+! this is the contribution to the Fock operator from the core electrons
+ END_DOC
+ core_fock_operator_complex = (0.d0,0.d0)
+ do i = 1, n_act_orb
+  j = list_act(i)
+  do k = 1, n_act_orb
+   l = list_act(k)
+   do m = 1, n_core_orb
+    n = list_core(m)
+    core_fock_operator_complex(j,l) += 2.d0 * &
+                    get_two_e_integral_complex(j,n,l,n,mo_integrals_map,mo_integrals_map_2) - &
+                    get_two_e_integral_complex(j,n,n,l,mo_integrals_map,mo_integrals_map_2)
+   enddo
+  enddo
+ enddo
+END_PROVIDER
diff --git a/src/mo_two_e_ints/df_mo_ints.irp.f b/src/mo_two_e_ints/df_mo_ints.irp.f
new file mode 100644
index 00000000..eba3b3da
--- /dev/null
+++ b/src/mo_two_e_ints/df_mo_ints.irp.f
@@ -0,0 +1,732 @@
+BEGIN_PROVIDER [complex*16, df_mo_integrals_complex, (mo_num_per_kpt,mo_num_per_kpt,df_num,kpt_pair_num)]
+  implicit none
+  BEGIN_DOC
+  !  df MO integrals
+  END_DOC
+  integer :: i,j,k,l
+
+  if (read_df_mo_integrals) then
+    call ezfio_get_mo_two_e_ints_df_mo_integrals_complex(df_mo_integrals_complex)
+    print *,  'df MO integrals read from disk'
+  else
+    call df_mo_from_df_ao(df_mo_integrals_complex,df_ao_integrals_complex,mo_num_per_kpt,ao_num_per_kpt,df_num,kpt_pair_num)
+  endif
+
+  if (write_df_mo_integrals) then
+    call ezfio_set_mo_two_e_ints_df_mo_integrals_complex(df_mo_integrals_complex)
+    print *,  'df MO integrals written to disk'
+  endif
+
+END_PROVIDER
+
+subroutine mo_map_fill_from_df_dot
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill mo bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l,mu
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_mo,j_mo,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:)
+
+  complex*16 :: integral,mjl,mik
+  integer                        :: n_integrals_1, n_integrals_2
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i_1(:), buffer_i_2(:)
+  real(integral_kind),allocatable :: buffer_values_1(:), buffer_values_2(:)
+  double precision               :: tmp_re,tmp_im
+  integer                        :: mo_num_kpt_2
+
+  double precision               :: cpu_1, cpu_2, wall_1, wall_2, wall_0
+  double precision               :: map_mb
+
+  logical :: use_map1
+  integer(key_kind) :: idx_tmp
+  double precision :: sign
+  !complex*16, external :: zdotc
+  complex*16, external :: zdotu
+
+  mo_num_kpt_2 = mo_num_per_kpt * mo_num_per_kpt
+
+  size_buffer = min(mo_num_per_kpt*mo_num_per_kpt*mo_num_per_kpt,16000000)
+  print*, 'Providing the mo_bielec integrals from 3-index df integrals'
+  call write_time(6)
+!  call ezfio_set_integrals_bielec_disk_access_mo_integrals('Write')
+!  TOUCH read_mo_integrals read_ao_integrals write_mo_integrals write_ao_integrals 
+
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  allocate( ints_jl(df_num,mo_num_per_kpt,mo_num_per_kpt))
+  allocate( ints_ik(df_num,mo_num_per_kpt,mo_num_per_kpt))
+
+  wall_0 = wall_1
+  do kl=1, kpt_num
+    do kj=1, kl
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_mo=1,mo_num_per_kpt
+          do j_mo=1,mo_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_df,i_mo,j_mo) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        do i_mo=1,mo_num_per_kpt
+          do j_mo=1,mo_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_df,i_mo,j_mo) = df_mo_integrals_complex(i_mo,j_mo,i_df,kjkl2)
+            enddo
+          enddo
+        enddo
+      endif
+
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+        if (ki < kk) then
+          do i_mo=1,mo_num_per_kpt
+            do j_mo=1,mo_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_df,i_mo,j_mo) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          do i_mo=1,mo_num_per_kpt
+            do j_mo=1,mo_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_df,i_mo,j_mo) = df_mo_integrals_complex(i_mo,j_mo,i_df,kikk2)
+              enddo
+            enddo
+          enddo
+        endif
+
+        !$OMP PARALLEL PRIVATE(i,k,j,l,ii,ik,ij,il,jl2,ik2, &
+            !$OMP  mu, mik, mjl, &
+            !$OMP  n_integrals_1, buffer_i_1, buffer_values_1, &
+            !$OMP  n_integrals_2, buffer_i_2, buffer_values_2, &
+            !$OMP  idx_tmp, tmp_re, tmp_im, integral,sign,use_map1) &
+            !$OMP  DEFAULT(NONE)  &
+            !$OMP  SHARED(size_buffer, kpt_num, df_num, mo_num_per_kpt, mo_num_kpt_2, &
+            !$OMP  kl,kj,kjkl2,ints_jl, &
+            !$OMP  ki,kk,kikk2,ints_ik, &
+            !$OMP  kconserv, df_mo_integrals_complex, mo_integrals_threshold, &
+            !$OMP  mo_integrals_map, mo_integrals_map_2)
+
+        allocate( &
+          buffer_i_1(size_buffer), &
+          buffer_i_2(size_buffer), &
+          buffer_values_1(size_buffer), &
+          buffer_values_2(size_buffer) &
+        )
+      
+        n_integrals_1=0
+        n_integrals_2=0
+        !$OMP DO SCHEDULE(guided)
+        do il=1,mo_num_per_kpt
+          l=il+(kl-1)*mo_num_per_kpt
+          do ij=1,mo_num_per_kpt
+            j=ij+(kj-1)*mo_num_per_kpt
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do ik=1,mo_num_per_kpt
+              k=ik+(kk-1)*mo_num_per_kpt
+              if (k>l) exit
+              do ii=1,mo_num_per_kpt
+                i=ii+(ki-1)*mo_num_per_kpt
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                !integral = zdotc(df_num,ints_jl(1,ij,il),1,ints_ik(1,ii,ik),1)
+                integral = zdotu(df_num,ints_jl(1,ij,il),1,ints_ik(1,ii,ik),1)
+!                print*,i,k,j,l,real(integral),imag(integral)
+                if (cdabs(integral) < mo_integrals_threshold) then
+                  cycle
+                endif
+                call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+                tmp_re = dble(integral)
+                tmp_im = dimag(integral)
+                if (use_map1) then
+                  n_integrals_1 += 1
+                  buffer_i_1(n_integrals_1)=idx_tmp
+                  buffer_values_1(n_integrals_1)=tmp_re
+                  if (sign.ne.0.d0) then
+                    n_integrals_1 += 1
+                    buffer_i_1(n_integrals_1)=idx_tmp+1
+                    buffer_values_1(n_integrals_1)=tmp_im*sign
+                  endif 
+                  if (n_integrals_1 >= size(buffer_i_1)-1) then
+                    call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+                    !call insert_into_mo_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1,mo_integrals_threshold)
+                    n_integrals_1 = 0
+                  endif
+                else
+                  n_integrals_2 += 1
+                  buffer_i_2(n_integrals_2)=idx_tmp
+                  buffer_values_2(n_integrals_2)=tmp_re
+                  if (sign.ne.0.d0) then
+                    n_integrals_2 += 1
+                    buffer_i_2(n_integrals_2)=idx_tmp+1
+                    buffer_values_2(n_integrals_2)=tmp_im*sign
+                  endif
+                  if (n_integrals_2 >= size(buffer_i_2)-1) then
+                    call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+                    !call insert_into_mo_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2,mo_integrals_threshold)
+                    n_integrals_2 = 0
+                  endif
+                endif
+
+              enddo !ii
+            enddo !ik
+          enddo !ij
+        enddo !il
+        !$OMP END DO NOWAIT
+
+        if (n_integrals_1 > 0) then
+          call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+          !call insert_into_mo_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1,mo_integrals_threshold)
+        endif
+        if (n_integrals_2 > 0) then
+          call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+          !call insert_into_mo_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2,mo_integrals_threshold)
+        endif
+        deallocate( &
+          buffer_i_1, &
+          buffer_i_2, &
+          buffer_values_1, &
+          buffer_values_2 &
+          )
+        !$OMP END PARALLEL
+      enddo !kk
+    enddo !kj
+    call wall_time(wall_2)
+    if (wall_2 - wall_0 > 1.d0) then
+      wall_0 = wall_2
+      print*, 100.*float(kl)/float(kpt_num), '% in ',               &
+          wall_2-wall_1,'s',map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+    endif
+
+  enddo !kl
+  deallocate( ints_jl,ints_ik )
+
+  call map_sort(mo_integrals_map)
+  call map_unique(mo_integrals_map)
+  call map_sort(mo_integrals_map_2)
+  call map_unique(mo_integrals_map_2)
+  !call map_merge(mo_integrals_map)
+  !call map_merge(mo_integrals_map_2)
+
+  !!call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_1',mo_integrals_map)
+  !!call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_2',mo_integrals_map_2)
+  !!call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
+  
+  call wall_time(wall_2)
+  call cpu_time(cpu_2)
+
+  integer*8                      :: get_mo_map_size, mo_map_size
+  mo_map_size = get_mo_map_size()
+  
+  print*,'MO integrals provided:'
+  print*,' Size of MO map           ', map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+  print*,' Number of MO integrals: ',  mo_map_size
+  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+  
+end subroutine mo_map_fill_from_df_dot
+
+subroutine mo_map_fill_from_df_single
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill mo bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l,mu
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_mo,j_mo,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:)
+
+  complex*16 :: integral,mjl,mik
+  integer                        :: n_integrals_1, n_integrals_2
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i_1(:), buffer_i_2(:)
+  real(integral_kind),allocatable :: buffer_values_1(:), buffer_values_2(:)
+  double precision               :: tmp_re,tmp_im
+  integer                        :: mo_num_kpt_2
+
+  double precision               :: cpu_1, cpu_2, wall_1, wall_2, wall_0
+  double precision               :: map_mb
+
+  logical :: use_map1
+  integer(key_kind) :: idx_tmp
+  double precision :: sign
+
+  mo_num_kpt_2 = mo_num_per_kpt * mo_num_per_kpt
+
+  size_buffer = min(mo_num_per_kpt*mo_num_per_kpt*mo_num_per_kpt,16000000)
+  print*, 'Providing the mo_bielec integrals from 3-index df integrals'
+  call write_time(6)
+!  call ezfio_set_integrals_bielec_disk_access_mo_integrals('Write')
+!  TOUCH read_mo_integrals read_ao_integrals write_mo_integrals write_ao_integrals 
+
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  allocate( ints_jl(mo_num_per_kpt,mo_num_per_kpt,df_num))
+  allocate( ints_ik(mo_num_per_kpt,mo_num_per_kpt,df_num))
+
+  wall_0 = wall_1
+  do kl=1, kpt_num
+    do kj=1, kl
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_mo=1,mo_num_per_kpt
+          do j_mo=1,mo_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_mo,j_mo,i_df) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        ints_jl = df_mo_integrals_complex(:,:,:,kjkl2)
+      endif
+
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+        if (ki < kk) then
+          do i_mo=1,mo_num_per_kpt
+            do j_mo=1,mo_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_mo,j_mo,i_df) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          ints_ik = df_mo_integrals_complex(:,:,:,kikk2)
+        endif
+
+        !$OMP PARALLEL PRIVATE(i,k,j,l,ii,ik,ij,il,jl2,ik2, &
+            !$OMP  mu, mik, mjl, &
+            !$OMP  n_integrals_1, buffer_i_1, buffer_values_1, &
+            !$OMP  n_integrals_2, buffer_i_2, buffer_values_2, &
+            !$OMP  idx_tmp, tmp_re, tmp_im, integral,sign,use_map1) &
+            !$OMP  DEFAULT(NONE)  &
+            !$OMP  SHARED(size_buffer, kpt_num, df_num, mo_num_per_kpt, mo_num_kpt_2, &
+            !$OMP  kl,kj,kjkl2,ints_jl, &
+            !$OMP  ki,kk,kikk2,ints_ik, &
+            !$OMP  kconserv, df_mo_integrals_complex, mo_integrals_threshold, &
+            !$OMP  mo_integrals_map, mo_integrals_map_2)
+
+        allocate( &
+          buffer_i_1(size_buffer), &
+          buffer_i_2(size_buffer), &
+          buffer_values_1(size_buffer), &
+          buffer_values_2(size_buffer) &
+        )
+      
+        n_integrals_1=0
+        n_integrals_2=0
+        !$OMP DO SCHEDULE(guided)
+        do mu=1,df_num
+          do il=1,mo_num_per_kpt
+            l=il+(kl-1)*mo_num_per_kpt
+            do ij=1,mo_num_per_kpt
+              j=ij+(kj-1)*mo_num_per_kpt
+              if (j>l) exit
+              call idx2_tri_int(j,l,jl2)
+              mjl = ints_jl(ij,il,mu)
+              if (mjl.eq.(0.d0,0.d0)) cycle
+              do ik=1,mo_num_per_kpt
+                k=ik+(kk-1)*mo_num_per_kpt
+                if (k>l) exit
+                do ii=1,mo_num_per_kpt
+                  i=ii+(ki-1)*mo_num_per_kpt
+                  if ((j==l) .and. (i>k)) exit
+                  call idx2_tri_int(i,k,ik2)
+                  if (ik2 > jl2) exit
+                  mik = ints_ik(ii,ik,mu)
+                  integral = mik * dconjg(mjl)
+!                  print*,i,k,j,l,real(integral),imag(integral)
+                  if (cdabs(integral) < mo_integrals_threshold) then
+                    cycle
+                  endif
+                  call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+                  tmp_re = dble(integral)
+                  tmp_im = dimag(integral)
+                  if (use_map1) then
+                    n_integrals_1 += 1
+                    buffer_i_1(n_integrals_1)=idx_tmp
+                    buffer_values_1(n_integrals_1)=tmp_re
+                    if (sign.ne.0.d0) then
+                      n_integrals_1 += 1
+                      buffer_i_1(n_integrals_1)=idx_tmp+1
+                      buffer_values_1(n_integrals_1)=tmp_im*sign
+                    endif 
+                    if (n_integrals_1 >= size(buffer_i_1)-1) then
+                      !call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+                      call insert_into_mo_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1,mo_integrals_threshold)
+                      n_integrals_1 = 0
+                    endif
+                  else
+                    n_integrals_2 += 1
+                    buffer_i_2(n_integrals_2)=idx_tmp
+                    buffer_values_2(n_integrals_2)=tmp_re
+                    if (sign.ne.0.d0) then
+                      n_integrals_2 += 1
+                      buffer_i_2(n_integrals_2)=idx_tmp+1
+                      buffer_values_2(n_integrals_2)=tmp_im*sign
+                    endif
+                    if (n_integrals_2 >= size(buffer_i_2)-1) then
+                      !call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+                      call insert_into_mo_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2,mo_integrals_threshold)
+                      n_integrals_2 = 0
+                    endif
+                  endif
+
+                enddo !ii
+              enddo !ik
+            enddo !ij
+          enddo !il
+        enddo !mu
+        !$OMP END DO NOWAIT
+
+        if (n_integrals_1 > 0) then
+          !call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+          call insert_into_mo_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1,mo_integrals_threshold)
+        endif
+        if (n_integrals_2 > 0) then
+          !call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+          call insert_into_mo_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2,mo_integrals_threshold)
+        endif
+        deallocate( &
+          buffer_i_1, &
+          buffer_i_2, &
+          buffer_values_1, &
+          buffer_values_2 &
+          )
+        !$OMP END PARALLEL
+      enddo !kk
+    enddo !kj
+    call wall_time(wall_2)
+    if (wall_2 - wall_0 > 1.d0) then
+      wall_0 = wall_2
+      print*, 100.*float(kl)/float(kpt_num), '% in ',               &
+          wall_2-wall_1,'s',map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+    endif
+
+  enddo !kl
+  deallocate( ints_jl,ints_ik )
+
+  !call map_sort(mo_integrals_map)
+  !call map_unique(mo_integrals_map)
+  !call map_sort(mo_integrals_map_2)
+  !call map_unique(mo_integrals_map_2)
+  call map_merge(mo_integrals_map)
+  call map_merge(mo_integrals_map_2)
+  !!call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_1',mo_integrals_map)
+  !!call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_2',mo_integrals_map_2)
+  !!call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
+  
+  call wall_time(wall_2)
+  call cpu_time(cpu_2)
+
+  integer*8                      :: get_mo_map_size, mo_map_size
+  mo_map_size = get_mo_map_size()
+  
+  print*,'MO integrals provided:'
+  print*,' Size of MO map           ', map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+  print*,' Number of MO integrals: ',  mo_map_size
+  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+  
+end subroutine mo_map_fill_from_df_single
+
+subroutine mo_map_fill_from_df
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill mo bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_mo,j_mo,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:), ints_ikjl(:,:,:,:)
+
+  complex*16 :: integral
+  integer                        :: n_integrals_1, n_integrals_2
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i_1(:), buffer_i_2(:)
+  real(integral_kind),allocatable :: buffer_values_1(:), buffer_values_2(:)
+  double precision               :: tmp_re,tmp_im
+  integer                        :: mo_num_kpt_2
+
+  double precision               :: cpu_1, cpu_2, wall_1, wall_2, wall_0
+  double precision               :: map_mb
+
+  logical :: use_map1
+  integer(keY_kind) :: idx_tmp
+  double precision :: sign
+
+  mo_num_kpt_2 = mo_num_per_kpt * mo_num_per_kpt
+
+  size_buffer = min(mo_num_per_kpt*mo_num_per_kpt*mo_num_per_kpt,16000000)
+  print*, 'Providing the mo_bielec integrals from 3-index df integrals'
+  call write_time(6)
+!  call ezfio_set_integrals_bielec_disk_access_mo_integrals('Write')
+!  TOUCH read_mo_integrals read_ao_integrals write_mo_integrals write_ao_integrals 
+
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  allocate( ints_jl(mo_num_per_kpt,mo_num_per_kpt,df_num))
+
+  wall_0 = wall_1
+  do kl=1, kpt_num
+    do kj=1, kl
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_mo=1,mo_num_per_kpt
+          do j_mo=1,mo_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_mo,j_mo,i_df) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        ints_jl = df_mo_integrals_complex(:,:,:,kjkl2)
+      endif
+
+  !$OMP PARALLEL PRIVATE(i,k,j,l,ki,kk,ii,ik,ij,il,kikk2,jl2,ik2, &
+      !$OMP  ints_ik, ints_ikjl, i_mo, j_mo, i_df, &
+      !$OMP  n_integrals_1, buffer_i_1, buffer_values_1, &
+      !$OMP  n_integrals_2, buffer_i_2, buffer_values_2, &
+      !$OMP  idx_tmp, tmp_re, tmp_im, integral,sign,use_map1) &
+      !$OMP  DEFAULT(NONE)  &
+      !$OMP  SHARED(size_buffer, kpt_num, df_num, mo_num_per_kpt, mo_num_kpt_2, &
+      !$OMP  kl,kj,kjkl2,ints_jl, & 
+      !$OMP  kconserv, df_mo_integrals_complex, mo_integrals_threshold, mo_integrals_map, mo_integrals_map_2)
+  
+  allocate( &
+    ints_ik(mo_num_per_kpt,mo_num_per_kpt,df_num), &
+    ints_ikjl(mo_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt), &
+    buffer_i_1(size_buffer), &
+    buffer_i_2(size_buffer), &
+    buffer_values_1(size_buffer), &
+    buffer_values_2(size_buffer) &
+  )
+
+  !$OMP DO SCHEDULE(guided)
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+      !  if ((kl == kj) .and. (ki > kk)) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+      !  if (kikk2 > kjkl2) cycle
+        if (ki < kk) then
+          do i_mo=1,mo_num_per_kpt
+            do j_mo=1,mo_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_mo,j_mo,i_df) = dconjg(df_mo_integrals_complex(j_mo,i_mo,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          ints_ik = df_mo_integrals_complex(:,:,:,kikk2)
+        endif
+
+        call zgemm('N','T', mo_num_kpt_2, mo_num_kpt_2, df_num, &
+               (1.d0,0.d0), ints_ik, mo_num_kpt_2, &
+               ints_jl, mo_num_kpt_2, &
+               (0.d0,0.d0), ints_ikjl, mo_num_kpt_2)
+
+        n_integrals_1=0
+        n_integrals_2=0
+        do il=1,mo_num_per_kpt
+          l=il+(kl-1)*mo_num_per_kpt
+          do ij=1,mo_num_per_kpt
+            j=ij+(kj-1)*mo_num_per_kpt
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do ik=1,mo_num_per_kpt
+              k=ik+(kk-1)*mo_num_per_kpt
+              if (k>l) exit
+              do ii=1,mo_num_per_kpt
+                i=ii+(ki-1)*mo_num_per_kpt
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                integral = ints_ikjl(ii,ik,ij,il)
+!                print*,i,k,j,l,real(integral),imag(integral)
+                if (cdabs(integral) < mo_integrals_threshold) then
+                  cycle
+                endif
+                call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+                tmp_re = dble(integral)
+                tmp_im = dimag(integral)
+                if (use_map1) then
+                  n_integrals_1 += 1
+                  buffer_i_1(n_integrals_1)=idx_tmp
+                  buffer_values_1(n_integrals_1)=tmp_re
+                  if (sign.ne.0.d0) then 
+                    n_integrals_1 += 1
+                    buffer_i_1(n_integrals_1)=idx_tmp+1
+                    buffer_values_1(n_integrals_1)=tmp_im*sign
+                  endif 
+                  if (n_integrals_1 >= size(buffer_i_1)-1) then
+                    call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+                    !call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+                    n_integrals_1 = 0
+                  endif
+                else
+                  n_integrals_2 += 1
+                  buffer_i_2(n_integrals_2)=idx_tmp
+                  buffer_values_2(n_integrals_2)=tmp_re
+                  if (sign.ne.0.d0) then
+                    n_integrals_2 += 1
+                    buffer_i_2(n_integrals_2)=idx_tmp+1
+                    buffer_values_2(n_integrals_2)=tmp_im*sign
+                  endif 
+                  if (n_integrals_2 >= size(buffer_i_2)-1) then
+                    call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+                    !call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+                    n_integrals_2 = 0
+                  endif
+                endif
+
+              enddo !ii
+            enddo !ik
+          enddo !ij
+        enddo !il
+
+        if (n_integrals_1 > 0) then
+          call map_append(mo_integrals_map, buffer_i_1, buffer_values_1, n_integrals_1)
+          !call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+        endif
+        if (n_integrals_2 > 0) then
+          call map_append(mo_integrals_map_2, buffer_i_2, buffer_values_2, n_integrals_2)
+          !call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+        endif
+      enddo !kk
+  !$OMP END DO NOWAIT
+  deallocate( &
+    ints_ik, &
+    ints_ikjl, &
+    buffer_i_1, &
+    buffer_i_2, &
+    buffer_values_1, &
+    buffer_values_2 &
+    )
+  !$OMP END PARALLEL
+    enddo !kj
+    call wall_time(wall_2)
+    if (wall_2 - wall_0 > 1.d0) then
+      wall_0 = wall_2
+      print*, 100.*float(kl)/float(kpt_num), '% in ',               &
+          wall_2-wall_1,'s',map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+    endif
+
+  enddo !kl
+  deallocate( ints_jl ) 
+
+  call map_sort(mo_integrals_map)
+  call map_unique(mo_integrals_map)
+  call map_sort(mo_integrals_map_2)
+  call map_unique(mo_integrals_map_2)
+  !call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_1',mo_integrals_map)
+  !call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_2',mo_integrals_map_2)
+  !call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
+  
+  call wall_time(wall_2)
+  call cpu_time(cpu_2)
+
+  integer*8                      :: get_mo_map_size, mo_map_size
+  mo_map_size = get_mo_map_size()
+  
+  print*,'MO integrals provided:'
+  print*,' Size of MO map           ', map_mb(mo_integrals_map),'+',map_mb(mo_integrals_map_2),'MB'
+  print*,' Number of MO integrals: ',  mo_map_size
+  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+  
+end subroutine mo_map_fill_from_df
+
+subroutine df_mo_from_df_ao(df_mo,df_ao,n_mo,n_ao,n_df,n_k_pairs)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! create 3-idx mo ints from 3-idx ao ints
+  END_DOC
+  integer,intent(in) :: n_mo,n_ao,n_df,n_k_pairs
+  complex*16,intent(out) :: df_mo(n_mo,n_mo,n_df,n_k_pairs)
+  complex*16,intent(in)  :: df_ao(n_ao,n_ao,n_df,n_k_pairs)
+  integer :: kl,kj,kjkl2,mu,p,q
+  complex*16,allocatable :: coef_l(:,:), coef_j(:,:), ints_jl(:,:), ints_tmp(:,:)
+  double precision :: wall_1,wall_2,cpu_1,cpu_2
+
+  print*,'providing 3-index MO integrals from 3-index AO integrals'
+
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+  allocate( &
+              coef_l(n_ao,n_mo),&
+              coef_j(n_ao,n_mo),&
+             ints_jl(n_ao,n_ao),&
+            ints_tmp(n_mo,n_ao)&
+          )
+
+  do kl=1, kpt_num
+    coef_l = mo_coef_complex_kpts(:,:,kl)
+    do kj=1, kl
+      coef_j = mo_coef_complex_kpts(:,:,kj)
+      kjkl2 = kj+shiftr(kl*kl-kl,1)
+      do mu=1, df_num
+        ints_jl = df_ao(:,:,mu,kjkl2)
+        call zgemm('C','N',n_mo,n_ao,n_ao, &
+              (1.d0,0.d0), coef_l, n_ao, &
+              ints_jl, n_ao, &
+              (0.d0,0.d0), ints_tmp, n_mo)
+
+        call zgemm('N','N',n_mo,n_mo,n_ao, &
+              (1.d0,0.d0), ints_tmp, n_mo, &
+              coef_j, n_ao, &
+              (0.d0,0.d0), df_mo(:,:,mu,kjkl2), n_mo)
+      enddo
+    enddo
+    call wall_time(wall_2)
+    print*,100.*float(kl*(kl+1))/(2.*n_k_pairs), '% in ', &
+                wall_2-wall_1, 's'
+  enddo
+
+  deallocate( &
+            coef_l, &
+            coef_j, &
+            ints_jl, &
+            ints_tmp &
+          )
+  call wall_time(wall_2)
+  call cpu_time(cpu_2)
+  print*,' 3-idx MO provided'
+  print*,'  cpu  time:',cpu_2-cpu_1,'s'
+  print*,'  wall time:',wall_2-wall_1,'s  ( x ',(cpu_2-cpu_1)/(wall_2-wall_1),')'
+
+end subroutine df_mo_from_df_ao
diff --git a/src/mo_two_e_ints/four_idx_novvvv_cplx.irp.f b/src/mo_two_e_ints/four_idx_novvvv_cplx.irp.f
new file mode 100644
index 00000000..0f129256
--- /dev/null
+++ b/src/mo_two_e_ints/four_idx_novvvv_cplx.irp.f
@@ -0,0 +1,247 @@
+BEGIN_PROVIDER [ complex*16, mo_coef_novirt_complex, (ao_num,n_core_inact_act_orb) ]
+ implicit none
+ BEGIN_DOC
+ ! MO coefficients without virtual MOs
+ END_DOC
+ integer :: j,jj
+
+ do j=1,n_core_inact_act_orb
+   jj = list_core_inact_act(j)
+   mo_coef_novirt_complex(:,j) = mo_coef_complex(:,jj)
+ enddo
+
+END_PROVIDER
+
+subroutine ao_to_mo_novirt_complex(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the |MO| basis excluding virtuals
+  !
+  ! $C^\dagger.A_{ao}.C$
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,n_core_inact_act_orb)
+  complex*16, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num,n_core_inact_act_orb) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  call zgemm('N','N', ao_num, n_core_inact_act_orb, ao_num,          &
+      (1.d0,0.d0), A_ao,LDA_ao,                                             &
+      mo_coef_novirt_complex, size(mo_coef_novirt_complex,1),                        &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('C','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,&
+      (1.d0,0.d0), mo_coef_novirt_complex,size(mo_coef_novirt_complex,1),  &
+      T, ao_num,                                                     &
+      (0.d0,0.d0), A_mo, size(A_mo,1))
+  
+  deallocate(T)
+end
+
+subroutine ao_to_mo_novirt_conjg_complex(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the |MO| basis excluding virtuals
+  !
+  ! $C^\dagger.A_{ao}.C^*$
+  ! half-transformed ints as handled by four_idx_novvvv need to use this
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  complex*16, intent(in)   :: A_ao(LDA_ao,ao_num)
+  complex*16, intent(out)  :: A_mo(LDA_mo,n_core_inact_act_orb)
+  complex*16, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num,n_core_inact_act_orb) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  call zgemm('N','N', ao_num, n_core_inact_act_orb, ao_num,          &
+      (1.d0,0.d0), A_ao,LDA_ao,                                             &
+      dconjg(mo_coef_novirt_complex), size(mo_coef_novirt_complex,1),                        &
+      (0.d0,0.d0), T, size(T,1))
+  
+  call zgemm('C','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,&
+      (1.d0,0.d0), mo_coef_novirt_complex,size(mo_coef_novirt_complex,1),  &
+      T, ao_num,                                                     &
+      (0.d0,0.d0), A_mo, size(A_mo,1))
+  
+  deallocate(T)
+end
+
+
+subroutine four_idx_novvvv_complex
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Retransform MO integrals for next CAS-SCF step
+  END_DOC
+  integer                        :: i,j,k,l,n_integrals1,n_integrals2
+  logical                        :: use_map1
+  complex*16, allocatable        :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:)
+  complex*16, external           :: get_ao_two_e_integral_complex
+  integer(key_kind), allocatable :: idx1(:),idx2(:)
+  complex(integral_kind), allocatable :: values1(:),values2(:)
+  double precision               :: sign_tmp
+  integer(key_kind)              :: idx_tmp
+
+  integer                        :: p,q,r,s
+  allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , &
+            T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) )
+  
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+  !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, &
+  !$OMP   mo_integrals_threshold,mo_integrals_map,           &
+  !$OMP   mo_integrals_map_2,ao_integrals_map_2,           &
+  !$OMP   list_core_inact_act,T2,ao_integrals_map)            &
+  !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx1,idx2,values1,values2,n_integrals1, &
+  !$OMP   n_integrals2,use_map1,idx_tmp,sign_tmp,              &
+  !$OMP   f,f2,d)
+  allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), &
+           idx1(2*mo_num*mo_num), values1(2*mo_num*mo_num), &
+           idx2(2*mo_num*mo_num), values2(2*mo_num*mo_num) )
+  
+  ! <aa|vv>
+  !$OMP DO
+  do s=1,ao_num
+    do r=1,ao_num
+      do q=1,ao_num
+        do p=1,r
+          f (p,q,r) = get_ao_two_e_integral_complex(p,q,r,s,ao_integrals_map,ao_integrals_map_2)
+          f (r,q,p) = get_ao_two_e_integral_complex(r,q,p,s,ao_integrals_map,ao_integrals_map_2)
+        enddo
+      enddo
+    enddo
+    do r=1,ao_num
+      do q=1,ao_num
+        do p=1,ao_num
+          f2(p,q,r) = f(p,r,q)
+        enddo
+      enddo
+    enddo
+    ! f (p,q,r) = <pq|rs>
+    ! f2(p,q,r) = <pr|qs>
+
+    do r=1,ao_num
+      call ao_to_mo_novirt_conjg_complex(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1))
+      call ao_to_mo_novirt_complex(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1))
+    enddo
+    ! T (i,j,p,q) = <ij|rs>
+    ! T2(i,j,p,q) = <ir|js>
+
+  enddo
+  !$OMP END DO 
+
+  !$OMP DO
+  do j=1,n_core_inact_act_orb
+    do i=1,n_core_inact_act_orb
+      do s=1,ao_num
+        do r=1,ao_num
+          f (r,s,1) = T (i,j,r,s)
+          f2(r,s,1) = T2(i,j,r,s)
+        enddo
+      enddo
+      call ao_to_mo_noconjg_complex(f ,size(f ,1),d,size(d,1))
+      n_integrals1 = 0
+      n_integrals2 = 0
+      do l=1,mo_num
+        do k=1,mo_num
+          call ao_two_e_integral_complex_map_idx_sign(list_core_inact_act(i),list_core_inact_act(j),k,l,use_map1,idx_tmp,sign_tmp)
+          if (use_map1) then
+            n_integrals1+=1
+            values1(n_integrals1) = dble(d(k,l))
+            idx1(n_integrals1) = idx_tmp
+            if (sign_tmp /= 0.d0) then ! should always be true, but might change in the future
+              n_integrals1+=1
+              values1(n_integrals1) = sign_tmp*dimag(d(k,l))
+              idx1(n_integrals1) = idx_tmp+1
+            endif
+          else
+            n_integrals2+=1
+            values2(n_integrals2) = dble(d(k,l))
+            idx2(n_integrals2) = idx_tmp
+            if (sign_tmp /= 0.d0) then
+              n_integrals2+=1
+              values2(n_integrals2) = sign_tmp*dimag(d(k,l))
+              idx2(n_integrals2) = idx_tmp+1
+            endif
+          endif
+        enddo
+      enddo
+      call map_append(mo_integrals_map, idx1, values1, n_integrals1)
+      call map_append(mo_integrals_map_2, idx2, values2, n_integrals2)
+
+      call ao_to_mo(f2,size(f2,1),d,size(d,1))
+      n_integrals1 = 0
+      n_integrals2 = 0
+      do l=1,mo_num
+        do k=1,mo_num
+          call ao_two_e_integral_complex_map_idx_sign(list_core_inact_act(i),k,list_core_inact_act(j),l,use_map1,idx_tmp,sign_tmp)
+          if (use_map1) then
+            n_integrals1+=1
+            values1(n_integrals1) = dble(d(k,l))
+            idx1(n_integrals1) = idx_tmp
+            if (sign_tmp /= 0.d0) then ! should always be true, but might change in the future
+              n_integrals1+=1
+              values1(n_integrals1) = sign_tmp*dimag(d(k,l))
+              idx1(n_integrals1) = idx_tmp+1
+            endif
+          else
+            n_integrals2+=1
+            values2(n_integrals2) = dble(d(k,l))
+            idx2(n_integrals2) = idx_tmp
+            if (sign_tmp /= 0.d0) then
+              n_integrals2+=1
+              values2(n_integrals2) = sign_tmp*dimag(d(k,l))
+              idx2(n_integrals2) = idx_tmp+1
+            endif
+          endif
+        enddo
+      enddo
+      call map_append(mo_integrals_map, idx1, values1, n_integrals1)
+      call map_append(mo_integrals_map_2, idx2, values2, n_integrals2)
+    enddo
+  enddo
+  !$OMP END DO
+  deallocate(f,f2,d,idx1,idx2,values1,values2)
+  
+  !$OMP END PARALLEL
+
+  deallocate(T,T2)
+  
+  
+  call map_sort(mo_integrals_map)
+  call map_unique(mo_integrals_map)
+  call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind))
+
+  call map_sort(mo_integrals_map_2)
+  call map_unique(mo_integrals_map_2)
+  call map_shrink(mo_integrals_map_2,real(mo_integrals_threshold,integral_kind))
+  
+end
+
+subroutine four_idx_novvvv2_complex
+  use bitmasks
+  implicit none
+  integer                        :: i
+  integer(bit_kind)              :: mask_ijkl(N_int,4)
+
+    print*, '<ix|ix>'
+    do i = 1,N_int
+      mask_ijkl(i,1) =  core_inact_act_bitmask_4(i,1)
+      mask_ijkl(i,2) =  full_ijkl_bitmask_4(i,1)
+      mask_ijkl(i,3) =  core_inact_act_bitmask_4(i,1)
+      mask_ijkl(i,4) =  full_ijkl_bitmask_4(i,1)
+    enddo
+    call add_integrals_to_map_complex(mask_ijkl)
+
+    print*, '<ii|vv>'
+    do i = 1,N_int
+      mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1)
+      mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1)
+      mask_ijkl(i,3) = virt_bitmask(i,1)
+      mask_ijkl(i,4) = virt_bitmask(i,1)
+    enddo
+    call add_integrals_to_map_complex(mask_ijkl)
+
+end
diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f
index 73e31182..438f4102 100644
--- a/src/mo_two_e_ints/integrals_3_index.irp.f
+++ b/src/mo_two_e_ints/integrals_3_index.irp.f
@@ -25,3 +25,70 @@
 
 END_PROVIDER
 
+ BEGIN_PROVIDER [complex*16, big_array_coulomb_integrals_complex, (mo_num,mo_num, mo_num)]
+&BEGIN_PROVIDER [complex*16, big_array_exchange_integrals_complex,(mo_num,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! big_array_coulomb_integrals(j,i,k)  = <ij|kj> = (ik|jj)
+ ! big_array_exchange_integrals(j,i,k) = <ij|jk> = (ij|jk)
+ !    for both of these, i and k must be from same kpt for integral to be nonzero
+ ! TODO: only loop over half, and assign two elements:
+ !        b_a_coul_int(j,i,k) = b_a_coul_int(j,k,i)*
+ !        b_a_exch_int(j,i,k) = b_a_exch_int(j,k,i)*
+ END_DOC
+ integer :: i,j,k,l
+ complex*16 :: get_two_e_integral_complex
+ complex*16 :: integral
+
+ do k = 1, mo_num
+  do i = 1, mo_num
+   do j = 1, mo_num
+     l = j
+     integral = get_two_e_integral_complex(i,j,k,l,mo_integrals_map,mo_integrals_map_2)
+     big_array_coulomb_integrals_complex(j,i,k) = integral
+     l = j
+     integral = get_two_e_integral_complex(i,j,l,k,mo_integrals_map,mo_integrals_map_2)
+     big_array_exchange_integrals_complex(j,i,k) = integral
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [complex*16, big_array_coulomb_integrals_kpts, (mo_num_per_kpt,kpt_num,mo_num_per_kpt, mo_num_per_kpt,kpt_num)]
+&BEGIN_PROVIDER [complex*16, big_array_exchange_integrals_kpts,(mo_num_per_kpt,kpt_num,mo_num_per_kpt, mo_num_per_kpt,kpt_num)]
+ implicit none
+ BEGIN_DOC
+ ! big_array_coulomb_integrals(j,kj,i,k,ki)  = <ij|kj> = (ik|jj)
+ ! big_array_exchange_integrals(j,kj,i,k,ki) = <ij|jk> = (ij|jk)
+ !    for both of these, i and k must be from same kpt for integral to be nonzero
+ ! TODO: only loop over half, and assign two elements:
+ !        b_a_coul_int(j,i,k) = b_a_coul_int(j,k,i)*
+ !        b_a_exch_int(j,i,k) = b_a_exch_int(j,k,i)*
+ END_DOC
+ integer :: i,j,k,l
+ integer :: ki,kj,kk,kl
+ complex*16 :: get_two_e_integral_kpts
+ complex*16 :: integral
+ 
+ do ki = 1,kpt_num
+  kk=ki
+  do k = 1, mo_num_per_kpt
+   do i = 1, mo_num_per_kpt
+    do kj = 1,kpt_num
+     kl=kj
+     do j = 1, mo_num_per_kpt
+       l = j
+       integral = get_two_e_integral_kpts(i,j,k,l,ki,kj,kk,kl,mo_integrals_map,mo_integrals_map_2)
+       big_array_coulomb_integrals_kpts(j,kj,i,k,ki) = integral
+       l = j
+       integral = get_two_e_integral_kpts(i,j,l,k,ki,kj,kl,kk,mo_integrals_map,mo_integrals_map_2)
+       big_array_exchange_integrals_kpts(j,kj,i,k,ki) = integral
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
+
diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f
index 661add2e..3b251f9f 100644
--- a/src/mo_two_e_ints/map_integrals.irp.f
+++ b/src/mo_two_e_ints/map_integrals.irp.f
@@ -4,6 +4,7 @@ use map_module
 !! ======
 
 BEGIN_PROVIDER [ type(map_type), mo_integrals_map ]
+&BEGIN_PROVIDER [ type(map_type), mo_integrals_map_2 ]
   implicit none
   BEGIN_DOC
   ! MO integrals
@@ -11,9 +12,17 @@ BEGIN_PROVIDER [ type(map_type), mo_integrals_map ]
   integer(key_kind)              :: key_max
   integer(map_size_kind)         :: sze
   call two_e_integrals_index(mo_num,mo_num,mo_num,mo_num,key_max)
-  sze = key_max
-  call map_init(mo_integrals_map,sze)
-  print*, 'MO map initialized: ', sze
+  if (is_complex) then
+    sze = key_max*2
+    call map_init(mo_integrals_map,sze)
+    call map_init(mo_integrals_map_2,sze)
+    print*,  'MO maps initialized (complex): ', 2*sze
+  else
+    sze = key_max
+    call map_init(mo_integrals_map,sze)
+    call map_init(mo_integrals_map_2,1_map_size_kind)
+    print*, 'MO map initialized: ', sze
+  endif
 END_PROVIDER
 
 subroutine insert_into_mo_integrals_map(n_integrals,                 &
@@ -85,7 +94,6 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*12
 
 END_PROVIDER
 
-
 double precision function get_two_e_integral(i,j,k,l,map)
   use map_module
   implicit none
@@ -118,7 +126,6 @@ double precision function get_two_e_integral(i,j,k,l,map)
   endif
 end
 
-
 double precision function mo_two_e_integral(i,j,k,l)
   implicit none
   BEGIN_DOC
@@ -272,13 +279,15 @@ subroutine get_mo_two_e_integrals_exch_ii(k,l,sze,out_val,map)
 
 end
 
-
 integer*8 function get_mo_map_size()
   implicit none
   BEGIN_DOC
   ! Return the number of elements in the MO map
   END_DOC
   get_mo_map_size = mo_integrals_map % n_elements
+  if (is_complex) then
+    get_mo_map_size += mo_integrals_map_2 % n_elements
+  endif
 end
 
 
diff --git a/src/mo_two_e_ints/map_integrals_cplx.irp.f b/src/mo_two_e_ints/map_integrals_cplx.irp.f
new file mode 100644
index 00000000..5be8fd3c
--- /dev/null
+++ b/src/mo_two_e_ints/map_integrals_cplx.irp.f
@@ -0,0 +1,586 @@
+use map_module
+
+subroutine insert_into_mo_integrals_map_2(n_integrals,               &
+      buffer_i, buffer_values, thr)
+  use map_module
+  implicit none
+
+  BEGIN_DOC
+  ! Create new entry into MO map, or accumulate in an existing entry
+  END_DOC
+
+  integer, intent(in)                :: n_integrals
+  integer(key_kind), intent(inout)   :: buffer_i(n_integrals)
+  real(integral_kind), intent(inout) :: buffer_values(n_integrals)
+  real(integral_kind), intent(in)    :: thr
+  call map_update(mo_integrals_map_2, buffer_i, buffer_values, n_integrals, thr)
+end
+
+BEGIN_PROVIDER [ complex*16, mo_integrals_cache_complex, (0_8:128_8*128_8*128_8*128_8) ]
+ implicit none
+ BEGIN_DOC
+ ! Cache of MO integrals for fast access
+ END_DOC
+ PROVIDE mo_two_e_integrals_in_map
+ integer*8                      :: i,j,k,l
+ integer*4                      :: i4,j4,k4,l4
+ integer*8                      :: ii
+ integer(key_kind)              :: idx
+ complex(integral_kind)            :: integral
+ complex*16 :: get_two_e_integral_complex_simple
+ FREE ao_integrals_cache
+ !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral)
+ do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8
+   l4 = int(l,4)
+   do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8
+     k4 = int(k,4)
+     do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8
+       j4 = int(j,4)
+       do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8
+         i4 = int(i,4)
+         !DIR$ FORCEINLINE
+         integral = get_two_e_integral_complex_simple(i4,j4,k4,l4,&
+                    mo_integrals_map,mo_integrals_map_2)
+         ii = l-mo_integrals_cache_min_8
+         ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8)
+         ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8)
+         ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8)
+         mo_integrals_cache_complex(ii) = integral
+       enddo
+     enddo
+   enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+
+complex*16 function get_two_e_integral_complex_simple(i,j,k,l,map,map2) result(result)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets one MO bi-electronic integral from the MO map
+  ! reuse ao map/idx/sign function
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx
+  real(integral_kind)            :: tmp_re, tmp_im
+  type(map_type), intent(inout)  :: map,map2
+  complex(integral_kind)         :: tmp
+  logical                        :: use_map1
+  double precision               :: sign
+  PROVIDE mo_two_e_integrals_in_map
+  call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx,sign)
+  if (use_map1) then
+    call map_get(map,idx,tmp_re)
+    call map_get(map,idx+1,tmp_im)
+    tmp_im *= sign
+  else
+    call map_get(map2,idx,tmp_re)
+    if (sign/=0.d0) then
+      call map_get(map2,idx+1,tmp_im)
+      tmp_im *= sign
+    else
+      tmp_im=0.d0
+    endif
+  endif
+  tmp = dcmplx(tmp_re,tmp_im)
+  result = tmp
+end
+
+complex*16 function get_two_e_integral_complex(i,j,k,l,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns one integral <ij|kl> in the MO basis
+  ! TODO: finish this
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx
+  integer                        :: ii
+  integer*8                      :: ii_8
+  type(map_type), intent(inout)  :: map,map2
+  complex(integral_kind)         :: tmp
+  complex(integral_kind)         :: get_two_e_integral_complex_simple
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache_complex
+  ii = l-mo_integrals_cache_min
+  ii = ior(ii, k-mo_integrals_cache_min)
+  ii = ior(ii, j-mo_integrals_cache_min)
+  ii = ior(ii, i-mo_integrals_cache_min)
+  if (iand(ii, -128) /= 0) then
+    tmp = get_two_e_integral_complex_simple(i,j,k,l,map,map2)
+  else
+    ii_8 = int(l,8)-mo_integrals_cache_min_8
+    ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8)
+    ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8)
+    ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8)
+    tmp = mo_integrals_cache_complex(ii_8)
+  endif
+  get_two_e_integral_complex = tmp
+end
+
+complex*16 function get_two_e_integral_kpts(i,j,k,l,ki,kj,kk,kl,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns one integral <ij|kl> in the MO basis
+  ! TODO: finish this
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer, intent(in)            :: ki,kj,kk,kl
+  type(map_type), intent(inout)  :: map,map2
+  complex*16                     :: get_two_e_integral_complex
+  complex*16 :: tmp
+  tmp = get_two_e_integral_complex( i + mo_num_per_kpt*(ki-1), &
+                                    j + mo_num_per_kpt*(kj-1), &
+                                    k + mo_num_per_kpt*(kk-1), &
+                                    l + mo_num_per_kpt*(kl-1), map,map2)
+  get_two_e_integral_kpts = tmp
+end
+
+complex*16 function mo_two_e_integral_complex(i,j,k,l)
+  implicit none
+  BEGIN_DOC
+  ! Returns one integral <ij|kl> in the MO basis
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  complex*16                     :: get_two_e_integral_complex
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache_complex
+  PROVIDE mo_two_e_integrals_in_map
+  !DIR$ FORCEINLINE
+  mo_two_e_integral_complex = get_two_e_integral_complex(i,j,k,l,mo_integrals_map,mo_integrals_map_2)
+  return
+end
+
+complex*16 function mo_two_e_integral_kpts(i,j,k,l,ki,kj,kk,kl)
+  implicit none
+  BEGIN_DOC
+  ! Returns one integral <ij|kl> in the MO basis
+  END_DOC
+  integer, intent(in)            :: i,j,k,l
+  integer, intent(in)            :: ki,kj,kk,kl
+  complex*16                     :: get_two_e_integral_complex
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache_complex
+  PROVIDE mo_two_e_integrals_in_map
+  !DIR$ FORCEINLINE
+  mo_two_e_integral_kpts = get_two_e_integral_complex( &
+                                    i + mo_num_per_kpt*(ki-1), &
+                                    j + mo_num_per_kpt*(kj-1), &
+                                    k + mo_num_per_kpt*(kk-1), &
+                                    l + mo_num_per_kpt*(kl-1),mo_integrals_map,mo_integrals_map_2)
+  return
+end
+
+subroutine get_mo_two_e_integrals_complex(j,k,l,sze,out_val,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ij|kl> in the MO basis, all
+  ! i for j,k,l fixed.
+  END_DOC
+  integer, intent(in)            :: j,k,l, sze
+  complex*16, intent(out)        :: out_val(sze)
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: i
+  complex*16, external           :: get_two_e_integral_complex_simple
+
+  integer                        :: ii, ii0
+  integer*8                      :: ii_8, ii0_8
+  complex(integral_kind)         :: tmp
+  integer(key_kind)              :: i1, idx
+  integer(key_kind)              :: p,q,r,s,i2
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache_complex
+
+!DEBUG
+!  do i=1,sze
+!    out_val(i) = get_two_e_integral_complex(i,j,k,l,map,map2)
+!  enddo
+!  return
+!DEBUG
+
+  ii0 = l-mo_integrals_cache_min
+  ii0 = ior(ii0, k-mo_integrals_cache_min)
+  ii0 = ior(ii0, j-mo_integrals_cache_min)
+
+  ii0_8 = int(l,8)-mo_integrals_cache_min_8
+  ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8)
+  ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8)
+
+  do i=1,sze
+    ii = ior(ii0, i-mo_integrals_cache_min)
+    if (iand(ii, -128) == 0) then
+      ii_8 = ior( shiftl(ii0_8,7), int(i,8)-mo_integrals_cache_min_8)
+      out_val(i) = mo_integrals_cache_complex(ii_8)
+    else
+      out_val(i) = get_two_e_integral_complex_simple(i,j,k,l,map,map2)
+    endif
+  enddo
+end
+
+subroutine get_mo_two_e_integrals_kpts(j,ij,kj,k,ik,kk,l,il,kl,sze,out_val,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ij|kl> in the MO basis, all
+  ! i for j,k,l fixed.
+  END_DOC
+  integer, intent(in)            :: j,k,l, ij,ik,il, kj,kk,kl, sze
+  complex*16, intent(out)        :: out_val(sze)
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: i
+  complex*16, external           :: get_two_e_integral_complex_simple
+  complex*16, external           :: mo_two_e_integral_kpts
+
+  integer :: ki,imin0
+  integer                        :: ii, ii0
+  integer*8                      :: ii_8, ii0_8
+  complex(integral_kind)         :: tmp
+  integer(key_kind)              :: i1, idx
+  integer(key_kind)              :: p,q,r,s,i2
+  PROVIDE mo_two_e_integrals_in_map mo_integrals_cache_complex
+
+!DEBUG
+!  do i=1,sze
+!    out_val(i) = get_two_e_integral_complex(i,j,k,l,map,map2)
+!  enddo
+!  return
+!DEBUG
+
+  ki = kconserv(kk,kl,kj)
+  imin0 = (ki-1)*mo_num_per_kpt
+  ii0 = l-mo_integrals_cache_min
+  ii0 = ior(ii0, k-mo_integrals_cache_min)
+  ii0 = ior(ii0, j-mo_integrals_cache_min)
+
+  ii0_8 = int(l,8)-mo_integrals_cache_min_8
+  ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8)
+  ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8)
+
+  do i=1,sze
+    ii = ior(ii0, i+imin0-mo_integrals_cache_min)
+    if (iand(ii, -128) == 0) then
+      ii_8 = ior( shiftl(ii0_8,7), int(i+imin0,8)-mo_integrals_cache_min_8)
+      out_val(i) = mo_integrals_cache_complex(ii_8)
+    else
+      out_val(i) = get_two_e_integral_complex_simple(i+imin0,j,k,l,map,map2)
+    endif
+  enddo
+end
+
+!subroutine get_mo_two_e_integrals_ij_complex(k,l,sze,out_array,map)
+!  use map_module
+!  implicit none
+!  BEGIN_DOC
+!  ! Returns multiple integrals <ij|kl> in the MO basis, all
+!  ! i(1)j(2) 1/r12 k(1)l(2)
+!  ! i, j for k,l fixed.
+!  END_DOC
+!  integer, intent(in)            :: k,l, sze
+!  double precision, intent(out)  :: out_array(sze,sze)
+!  type(map_type), intent(inout)  :: map
+!  integer                        :: i,j,kk,ll,m
+!  integer(key_kind),allocatable  :: hash(:)
+!  integer  ,allocatable          :: pairs(:,:), iorder(:)
+!  real(integral_kind), allocatable :: tmp_val(:)
+!
+!  PROVIDE mo_two_e_integrals_in_map
+!  allocate (hash(sze*sze), pairs(2,sze*sze),iorder(sze*sze), &
+!  tmp_val(sze*sze))
+!
+!  kk=0
+!  out_array = 0.d0
+!  do j=1,sze
+!   do i=1,sze
+!    kk += 1
+!    !DIR$ FORCEINLINE
+!    call two_e_integrals_index(i,j,k,l,hash(kk))
+!    pairs(1,kk) = i
+!    pairs(2,kk) = j
+!    iorder(kk) = kk
+!   enddo
+!  enddo
+!
+!  logical :: integral_is_in_map
+!  if (key_kind == 8) then
+!    call i8radix_sort(hash,iorder,kk,-1)
+!  else if (key_kind == 4) then
+!    call iradix_sort(hash,iorder,kk,-1)
+!  else if (key_kind == 2) then
+!    call i2radix_sort(hash,iorder,kk,-1)
+!  endif
+!
+!  call map_get_many(mo_integrals_map, hash, tmp_val, kk)
+!
+!  do ll=1,kk
+!    m = iorder(ll)
+!    i=pairs(1,m)
+!    j=pairs(2,m)
+!    out_array(i,j) = tmp_val(ll)
+!  enddo
+!
+!  deallocate(pairs,hash,iorder,tmp_val)
+!end
+
+!subroutine get_mo_two_e_integrals_i1j1_complex(k,l,sze,out_array,map)
+!  use map_module
+!  implicit none
+!  BEGIN_DOC
+!  ! Returns multiple integrals <ik|jl> in the MO basis, all
+!  ! i(1)j(1) 1/r12 k(2)l(2)
+!  ! i, j for k,l fixed.
+!  END_DOC
+!  integer, intent(in)            :: k,l, sze
+!  double precision, intent(out)  :: out_array(sze,sze)
+!  type(map_type), intent(inout)  :: map
+!  integer                        :: i,j,kk,ll,m
+!  integer(key_kind),allocatable  :: hash(:)
+!  integer  ,allocatable          :: pairs(:,:), iorder(:)
+!  real(integral_kind), allocatable :: tmp_val(:)
+!
+!  PROVIDE mo_two_e_integrals_in_map
+!  allocate (hash(sze*sze), pairs(2,sze*sze),iorder(sze*sze), &
+!  tmp_val(sze*sze))
+!
+!  kk=0
+!  out_array = 0.d0
+!  do j=1,sze
+!   do i=1,sze
+!    kk += 1
+!    !DIR$ FORCEINLINE
+!    call two_e_integrals_index(i,k,j,l,hash(kk))
+!    pairs(1,kk) = i
+!    pairs(2,kk) = j
+!    iorder(kk) = kk
+!   enddo
+!  enddo
+!
+!  logical :: integral_is_in_map
+!  if (key_kind == 8) then
+!    call i8radix_sort(hash,iorder,kk,-1)
+!  else if (key_kind == 4) then
+!    call iradix_sort(hash,iorder,kk,-1)
+!  else if (key_kind == 2) then
+!    call i2radix_sort(hash,iorder,kk,-1)
+!  endif
+!
+!  call map_get_many(mo_integrals_map, hash, tmp_val, kk)
+!
+!  do ll=1,kk
+!    m = iorder(ll)
+!    i=pairs(1,m)
+!    j=pairs(2,m)
+!    out_array(i,j) = tmp_val(ll)
+!  enddo
+!
+!  deallocate(pairs,hash,iorder,tmp_val)
+!end
+
+subroutine get_mo_two_e_integrals_coulomb_ii_complex(k,l,sze,out_val,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ki|li>
+  ! k(1)i(2) 1/r12 l(1)i(2) :: out_val(i1)
+  ! for k,l fixed.
+  ! real and in map2 if k==l
+  ! complex and in map1 otherwise
+  ! take conjugate if k>l
+  ! TODO: determine best way to structure code 
+  !       to account for single/double integral_kind, real/complex, and +/- imag part
+  END_DOC
+  integer, intent(in)            :: k,l, sze
+  complex*16, intent(out)  :: out_val(sze)
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: i
+  integer(key_kind)              :: hash(sze),hash_re(sze),hash_im(sze)
+  real(integral_kind)            :: tmp_re(sze),tmp_im(sze)
+  double precision               :: out_re(sze),out_im(sze)
+  double precision :: sign
+  PROVIDE mo_two_e_integrals_in_map
+
+  if (k.eq.l) then ! real, call other function
+    call get_mo_two_e_integrals_coulomb_ijij_complex(k,sze,out_re,map2)
+    do i=1,sze
+      out_val(i) = dcmplx(out_re(i),0.d0)
+    enddo
+  else ! complex
+    if (k.gt.l) then
+      sign = -1.d0
+    else
+      sign = 1.d0
+    endif
+
+    do i=1,sze
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(k,i,l,i,hash(i))
+      !hash_im(i) = hash(i)*2
+      hash_im(i) = shiftl(hash(i),1)
+      hash_re(i) = hash_im(i)-1
+    enddo
+
+    if (integral_kind == 8) then
+      call map_get_many(map, hash_re, out_re, sze)
+      call map_get_many(map, hash_im, out_im, sze)
+      do i=1,sze
+        out_val(i) = dcmplx(out_re(i),sign*out_im(i))
+      enddo
+    else
+      call map_get_many(map, hash_re, tmp_re, sze)
+      call map_get_many(map, hash_im, tmp_im, sze)
+      ! Conversion to double complex
+      do i=1,sze
+        out_val(i) = dcmplx(tmp_re(i),sign*tmp_im(i))
+      enddo
+    endif
+  endif
+end
+
+subroutine get_mo_two_e_integrals_coulomb_ijij_complex(j,sze,out_val,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ij|ij>
+  ! i*(1)j*(2) 1/r12 i(1)j(2) :: out_val(i)
+  ! for j fixed.
+  ! always in map2, always real
+  END_DOC
+  integer, intent(in)            :: j, sze
+  double precision, intent(out)  :: out_val(sze)
+  type(map_type), intent(inout)  :: map2
+  integer                        :: i
+  integer(key_kind)              :: hash(sze),hash_re(sze)
+  real(integral_kind)            :: tmp_re(sze)
+  PROVIDE mo_two_e_integrals_in_map
+
+  do i=1,sze
+    !DIR$ FORCEINLINE
+    call two_e_integrals_index(i,j,i,j,hash(i))
+    !hash_re(i) = hash(i)*2 - 1
+    hash_re(i) = shiftl(hash(i),1) - 1
+  enddo
+
+  if (integral_kind == 8) then
+    call map_get_many(map2, hash_re, out_val, sze)
+  else
+    call map_get_many(map2, hash_re, tmp_re, sze)
+    ! Conversion to double complex
+    do i=1,sze
+      out_val(i) = dble(tmp_re(i))
+    enddo
+  endif
+end
+
+subroutine get_mo_two_e_integrals_exch_ii_complex(k,l,sze,out_val,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ki|il>
+  ! k*(1)i*(2) 1/r12 i(1)l(2) :: out_val(i1)
+  ! for k,l fixed.
+  ! 
+  ! if k<l, then:
+  ! i < k        map2 +
+  ! k <= i <= l  map1 +
+  ! l < i        map2 -
+  !
+  ! if l<k, then same maps as above, but take complex conjugate
+  END_DOC
+  integer, intent(in)            :: k,l, sze
+  complex*16, intent(out)  :: out_val(sze)
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: i,klmin,klmax
+  integer(key_kind)              :: hash(sze),hash_re(sze),hash_im(sze)
+  real(integral_kind)            :: tmp_re(sze),tmp_im(sze)
+  double precision               :: out_re(sze),out_im(sze)
+  double precision :: sign,sign2(sze)
+
+  PROVIDE mo_two_e_integrals_in_map
+
+  
+  if (k.eq.l) then ! real, call other function
+    call get_mo_two_e_integrals_exch_ijji_complex(k,sze,out_re,map,map2)
+    do i=1,sze
+      out_val(i) = dcmplx(out_re(i),0.d0)
+    enddo
+  else ! complex
+    if (k.gt.l) then
+      sign = -1.d0
+    else
+      sign = 1.d0
+    endif
+    klmin = min(k,l) ! put these in conditional above?
+    klmax = max(k,l)
+    sign2(1:klmax) = 1.d0
+    sign2(klmax+1:sze) = -1.d0
+    do i=1,sze
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(k,i,i,l,hash(i))
+      !hash_im(i) = hash(i)*2
+      hash_im(i) = shiftl(hash(i),1)
+      hash_re(i) = hash_im(i)-1
+    enddo
+
+    if (integral_kind == 8) then
+      call map_get_many(map2, hash_re(1:klmin-1),   out_re(1:klmin-1),   klmin-1)
+      call map_get_many(map2, hash_im(1:klmin-1),   out_im(1:klmin-1),   klmin-1)
+      call map_get_many(map,  hash_re(klmin:klmax), out_re(klmin:klmax), klmax-klmin+1)
+      call map_get_many(map,  hash_im(klmin:klmax), out_im(klmin:klmax), klmax-klmin+1)
+      if (klmax.lt.sze) then
+        call map_get_many(map2, hash_re(klmax+1:sze), out_re(klmax+1:sze), sze-klmax)
+        call map_get_many(map2, hash_im(klmax+1:sze), out_im(klmax+1:sze), sze-klmax)
+      endif
+      do i=1,sze
+        out_val(i) = dcmplx(out_re(i),sign*sign2(i)*out_im(i))
+      enddo
+    else
+      call map_get_many(map2, hash_re(1:klmin-1),   tmp_re(1:klmin-1),   klmin-1)
+      call map_get_many(map2, hash_im(1:klmin-1),   tmp_im(1:klmin-1),   klmin-1)
+      call map_get_many(map,  hash_re(klmin:klmax), tmp_re(klmin:klmax), klmax-klmin+1)
+      call map_get_many(map,  hash_im(klmin:klmax), tmp_im(klmin:klmax), klmax-klmin+1)
+      if (klmax.lt.sze) then
+        call map_get_many(map2, hash_re(klmax+1:sze), tmp_re(klmax+1:sze), sze-klmax)
+        call map_get_many(map2, hash_im(klmax+1:sze), tmp_im(klmax+1:sze), sze-klmax)
+      endif
+      ! Conversion to double complex
+      do i=1,sze
+        out_val(i) = dcmplx(tmp_re(i),sign*sign2(i)*tmp_im(i))
+      enddo
+    endif
+  endif
+end
+
+subroutine get_mo_two_e_integrals_exch_ijji_complex(j,sze,out_val,map,map2)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Returns multiple integrals <ij|ji>
+  ! i*(1)j*(2) 1/r12 j(1)i(2) :: out_val(i)
+  ! for j fixed.
+  ! always real, always in map2 
+  END_DOC
+  integer, intent(in)            :: j, sze
+  double precision, intent(out)  :: out_val(sze)
+  type(map_type), intent(inout)  :: map,map2
+  integer                        :: i
+  integer(key_kind)              :: hash(sze),hash_re(sze)
+  real(integral_kind)            :: tmp_val(sze)
+  PROVIDE mo_two_e_integrals_in_map
+
+  do i=1,sze
+    !DIR$ FORCEINLINE
+    call two_e_integrals_index(i,j,j,i,hash(i))
+    !hash_re(i) = 2*hash(i) - 1
+    hash_re(i) = shiftl(hash(i),1) - 1
+  enddo
+
+  if (integral_kind == 8) then
+    call map_get_many(map2, hash_re, out_val, sze)
+  else
+    call map_get_many(map2, hash_re, tmp_val, sze)
+    ! Conversion to double precision
+    do i=1,sze
+      out_val(i) = dble(tmp_val(i))
+    enddo
+  endif
+end
+
diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index b926d688..e2447d6b 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -28,9 +28,66 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
   integer(bit_kind)              :: mask_ijkl(N_int,4)
   integer(bit_kind)              :: mask_ijk(N_int,3)
   double precision               :: cpu_1, cpu_2, wall_1, wall_2
+  integer*8                      :: get_mo_map_size, mo_map_size
+  double precision, external     :: map_mb
 
   PROVIDE mo_class
 
+  if (is_complex) then
+    mo_two_e_integrals_in_map = .True.
+    if (read_mo_two_e_integrals) then
+      print*,'Reading the MO integrals'
+      call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints_complex_1',mo_integrals_map)
+      call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints_complex_2',mo_integrals_map_2)
+      print*, 'MO integrals provided (periodic)'
+      return
+    else if (read_df_mo_integrals.or.read_df_ao_integrals) then
+      PROVIDE df_mo_integrals_complex
+      !call mo_map_fill_from_df
+      !call mo_map_fill_from_df_single
+      call mo_map_fill_from_df_dot
+      return
+    else
+      PROVIDE ao_two_e_integrals_in_map
+    endif
+    
+    print *,  ''
+    print *,  'AO -> MO integrals transformation (periodic)'
+    print *,  '---------------------------------'
+    print *,  ''
+  
+    call wall_time(wall_1)
+    call cpu_time(cpu_1)
+  
+    if(no_vvvv_integrals)then
+      print*,'not implemented for periodic',irp_here
+      stop -1
+      call four_idx_novvvv_complex
+    else
+      print*,'not implemented for periodic',irp_here
+      stop -1
+      call add_integrals_to_map_complex(full_ijkl_bitmask_4)
+    endif
+  
+    call wall_time(wall_2)
+    call cpu_time(cpu_2)
+  
+    mo_map_size = get_mo_map_size()
+  
+    print*,'Molecular integrals provided:'
+    print*,' Size of MO map 1         ', map_mb(mo_integrals_map) ,'MB'
+    print*,' Size of MO map 2         ', map_mb(mo_integrals_map_2) ,'MB'
+    print*,' Number of MO integrals: ',  mo_map_size
+    print*,' cpu  time :',cpu_2 - cpu_1, 's'
+    print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+  
+    if (write_mo_two_e_integrals.and.mpi_master) then
+      call ezfio_set_work_empty(.False.)
+      call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_1',mo_integrals_map)
+      call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_complex_2',mo_integrals_map_2)
+      call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
+    endif
+  else
   mo_two_e_integrals_in_map = .True.
   if (read_mo_two_e_integrals) then
     print*,'Reading the MO integrals'
@@ -58,10 +115,8 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
   call wall_time(wall_2)
   call cpu_time(cpu_2)
 
-  integer*8                      :: get_mo_map_size, mo_map_size
   mo_map_size = get_mo_map_size()
 
-  double precision, external     :: map_mb
   print*,'Molecular integrals provided:'
   print*,' Size of MO map           ', map_mb(mo_integrals_map) ,'MB'
   print*,' Number of MO integrals: ',  mo_map_size
@@ -73,6 +128,7 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
     call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map)
     call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read')
   endif
+  endif
 
 END_PROVIDER
 
@@ -929,13 +985,94 @@ end
   ! mo_two_e_integrals_jj_exchange_from_ao(i,j) = J_ij
   ! mo_two_e_integrals_jj_anti_from_ao(i,j) = J_ij - K_ij
   END_DOC
-
+  
   integer                        :: i,j,p,q,r,s
   double precision               :: c
-  real(integral_kind)            :: integral
   integer                        :: n, pp
-  real(integral_kind), allocatable :: int_value(:)
   integer, allocatable           :: int_idx(:)
+  if (is_complex) then
+    complex(integral_kind)            :: integral2
+    complex(integral_kind), allocatable :: int_value2(:)
+    complex*16 :: cz
+
+    complex*16, allocatable  :: iqrs2(:,:), iqsr2(:,:), iqis2(:), iqri2(:)
+    PROVIDE ao_two_e_integrals_in_map mo_coef_complex
+    mo_two_e_integral_jj_from_ao = 0.d0
+    mo_two_e_integrals_jj_exchange_from_ao = 0.d0
+    !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs2, iqsr2
+
+
+    !$OMP PARALLEL DEFAULT(NONE)                                       &
+        !$OMP PRIVATE (i,j,p,q,r,s,integral2,c,n,pp,int_value2,int_idx,  &
+        !$OMP  iqrs2, iqsr2,iqri2,iqis2,cz)                                   &
+        !$OMP SHARED(mo_num,mo_coef_transp_complex,mo_coef_transp_complex_conjg,ao_num,    &
+        !$OMP  ao_integrals_threshold)             &
+        !$OMP REDUCTION(+:mo_two_e_integral_jj_from_ao,mo_two_e_integrals_jj_exchange_from_ao)
+
+    allocate( int_value2(ao_num), int_idx(ao_num),                      &
+        iqrs2(mo_num,ao_num), iqis2(mo_num), iqri2(mo_num),   &
+        iqsr2(mo_num,ao_num) )
+
+    !$OMP DO SCHEDULE (guided)
+    do s=1,ao_num
+      do q=1,ao_num
+
+        do j=1,ao_num
+          do i=1,mo_num
+            iqrs2(i,j) = (0.d0,0.d0)
+            iqsr2(i,j) = (0.d0,0.d0)
+          enddo
+        enddo
+
+
+          do r=1,ao_num
+            call get_ao_two_e_integrals_non_zero_complex(q,r,s,ao_num,int_value2,int_idx,n)
+            do pp=1,n
+              p = int_idx(pp)
+              integral2 = int_value2(pp)
+              if (cdabs(integral2) > ao_integrals_threshold) then
+                do i=1,mo_num
+                  iqrs2(i,r) += mo_coef_transp_complex_conjg(i,p) * integral2
+                enddo
+              endif
+            enddo
+            call get_ao_two_e_integrals_non_zero_complex(q,s,r,ao_num,int_value2,int_idx,n)
+            do pp=1,n
+              p = int_idx(pp)
+              integral2 = int_value2(pp)
+              if (cdabs(integral2) > ao_integrals_threshold) then
+                do i=1,mo_num
+                  iqsr2(i,r) += mo_coef_transp_complex_conjg(i,p) * integral2
+                enddo
+              endif
+            enddo
+          enddo
+        iqis2 = (0.d0,0.d0)
+        iqri2 = (0.d0,0.d0)
+        do r=1,ao_num
+          do i=1,mo_num
+            iqis2(i) += mo_coef_transp_complex(i,r) * iqrs2(i,r)
+            iqri2(i) += mo_coef_transp_complex(i,r) * iqsr2(i,r)
+          enddo
+        enddo
+        do i=1,mo_num
+          do j=1,mo_num
+            cz = mo_coef_transp_complex_conjg(j,q)*mo_coef_transp_complex(j,s)
+            mo_two_e_integral_jj_from_ao(j,i) += dble(cz * iqis2(i))
+            mo_two_e_integrals_jj_exchange_from_ao(j,i) += dble(cz * iqri2(i))
+          enddo
+        enddo
+
+      enddo
+    enddo
+    !$OMP END DO NOWAIT
+    deallocate(iqrs2,iqsr2,int_value2,int_idx)
+    !$OMP END PARALLEL
+
+
+  else
+  real(integral_kind)            :: integral
+  real(integral_kind), allocatable :: int_value(:)
 
   double precision, allocatable  :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:)
 
@@ -1040,7 +1177,7 @@ end
   !$OMP END DO NOWAIT
   deallocate(iqrs,iqsr,int_value,int_idx)
   !$OMP END PARALLEL
-
+  endif
   mo_two_e_integrals_jj_anti_from_ao = mo_two_e_integral_jj_from_ao - mo_two_e_integrals_jj_exchange_from_ao
 
 
@@ -1060,11 +1197,100 @@ END_PROVIDER
   integer                        :: i,j,p,q,r,s
   integer                        :: i0,j0
   double precision               :: c
-  real(integral_kind)            :: integral
   integer                        :: n, pp
-  real(integral_kind), allocatable :: int_value(:)
   integer, allocatable           :: int_idx(:)
 
+  if (is_complex) then
+    complex*16                     :: cz
+    complex(integral_kind)            :: integral2
+    complex(integral_kind), allocatable :: int_value2(:)
+    complex*16, allocatable  :: iqrs2(:,:), iqsr2(:,:), iqis2(:), iqri2(:)
+
+    PROVIDE ao_two_e_integrals_in_map mo_coef_complex
+
+    mo_two_e_integrals_vv_from_ao = 0.d0
+    mo_two_e_integrals_vv_exchange_from_ao = 0.d0
+
+    !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs2, iqsr2
+
+
+    !$OMP PARALLEL DEFAULT(NONE)                                            &
+        !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral2,c,n,pp,int_value2,int_idx, &
+        !$OMP  iqrs2, iqsr2,iqri2,iqis2,cz)                                   &
+        !$OMP SHARED(n_virt_orb,mo_num,list_virt,mo_coef_transp_complex,ao_num, &
+        !$OMP  mo_coef_transp_complex_conjg,                  &
+        !$OMP  ao_integrals_threshold,do_direct_integrals)                  &
+        !$OMP REDUCTION(+:mo_two_e_integrals_vv_from_ao,mo_two_e_integrals_vv_exchange_from_ao)
+
+    allocate( int_value2(ao_num), int_idx(ao_num),                      &
+        iqrs2(mo_num,ao_num), iqis2(mo_num), iqri2(mo_num),&
+        iqsr2(mo_num,ao_num) )
+
+    !$OMP DO SCHEDULE (guided)
+    do s=1,ao_num
+      do q=1,ao_num
+
+        do j=1,ao_num
+          do i0=1,n_virt_orb
+            i = list_virt(i0)
+            iqrs2(i,j) = (0.d0,0.d0)
+            iqsr2(i,j) = (0.d0,0.d0)
+          enddo
+        enddo
+
+
+          do r=1,ao_num
+            call get_ao_two_e_integrals_non_zero_complex(q,r,s,ao_num,int_value2,int_idx,n)
+            do pp=1,n
+              p = int_idx(pp)
+              integral2 = int_value2(pp)
+              if (cdabs(integral2) > ao_integrals_threshold) then
+                do i0=1,n_virt_orb
+                  i =list_virt(i0)
+                  iqrs2(i,r) += mo_coef_transp_complex_conjg(i,p) * integral2
+                enddo
+              endif
+            enddo
+            call get_ao_two_e_integrals_non_zero_complex(q,s,r,ao_num,int_value2,int_idx,n)
+            do pp=1,n
+              p = int_idx(pp)
+              integral2 = int_value2(pp)
+              if (cdabs(integral2) > ao_integrals_threshold) then
+                do i0=1,n_virt_orb
+                  i = list_virt(i0)
+                  iqsr2(i,r) += mo_coef_transp_complex_conjg(i,p) * integral2
+                enddo
+              endif
+            enddo
+          enddo
+
+        iqis2 = (0.d0,0.d0)
+        iqri2 = (0.d0,0.d0)
+        do r=1,ao_num
+          do i0=1,n_virt_orb
+            i = list_virt(i0)
+            iqis2(i) += mo_coef_transp_complex(i,r) * iqrs2(i,r)
+            iqri2(i) += mo_coef_transp_complex(i,r) * iqsr2(i,r)
+          enddo
+        enddo
+        do i0=1,n_virt_orb
+          i= list_virt(i0)
+          do j0=1,n_virt_orb
+            j = list_virt(j0)
+            cz = mo_coef_transp_complex_conjg(j,q)*mo_coef_transp_complex(j,s)
+            mo_two_e_integrals_vv_from_ao(j,i) += dble(cz * iqis2(i))
+            mo_two_e_integrals_vv_exchange_from_ao(j,i) += dble(cz * iqri2(i))
+          enddo
+        enddo
+
+      enddo
+    enddo
+    !$OMP END DO NOWAIT
+    deallocate(iqrs2,iqsr2,iqis2,iqri2,int_value2,int_idx)
+    !$OMP END PARALLEL
+  else
+  real(integral_kind)            :: integral
+  real(integral_kind), allocatable :: int_value(:)
   double precision, allocatable  :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:)
 
   if (.not.do_direct_integrals) then
@@ -1176,6 +1402,7 @@ END_PROVIDER
   !$OMP END DO NOWAIT
   deallocate(iqrs,iqsr,int_value,int_idx)
   !$OMP END PARALLEL
+  endif
 
   mo_two_e_integrals_vv_anti_from_ao = mo_two_e_integrals_vv_from_ao - mo_two_e_integrals_vv_exchange_from_ao
   ! print*, '**********'
@@ -1205,7 +1432,18 @@ END_PROVIDER
   PROVIDE mo_two_e_integrals_in_map
   mo_two_e_integrals_jj = 0.d0
   mo_two_e_integrals_jj_exchange = 0.d0
-
+  if (is_complex) then
+    complex*16 :: get_two_e_integral_complex
+    do j=1,mo_num
+      do i=1,mo_num
+        mo_two_e_integrals_jj(i,j) = dble(get_two_e_integral_complex(i,j,i,j,&
+                                       mo_integrals_map,mo_integrals_map_2))
+        mo_two_e_integrals_jj_exchange(i,j) = dble(get_two_e_integral_complex(i,j,j,i,&
+                                                mo_integrals_map,mo_integrals_map_2))
+        mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j)
+      enddo
+    enddo
+  else
   do j=1,mo_num
     do i=1,mo_num
       mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map)
@@ -1213,6 +1451,7 @@ END_PROVIDER
       mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j)
     enddo
   enddo
+  endif
 
 END_PROVIDER
 
@@ -1223,7 +1462,8 @@ subroutine clear_mo_map
   ! Frees the memory of the MO map
   END_DOC
   call map_deinit(mo_integrals_map)
-  FREE mo_integrals_map mo_two_e_integrals_jj mo_two_e_integrals_jj_anti
+  call map_deinit(mo_integrals_map_2)
+  FREE mo_integrals_map mo_integrals_map_2 mo_two_e_integrals_jj mo_two_e_integrals_jj_anti
   FREE mo_two_e_integrals_jj_exchange mo_two_e_integrals_in_map
 end
 
diff --git a/src/mo_two_e_ints/mo_bi_integrals_cplx.irp.f b/src/mo_two_e_ints/mo_bi_integrals_cplx.irp.f
new file mode 100644
index 00000000..8b96c498
--- /dev/null
+++ b/src/mo_two_e_ints/mo_bi_integrals_cplx.irp.f
@@ -0,0 +1,865 @@
+
+subroutine double_allowed_mo_kpts(h1,h2,p1,p2,is_allowed)
+  implicit none
+  integer, intent(in) :: h1,h2,p1,p2
+  logical, intent(out) :: is_allowed
+  integer :: kh1,kh2,kp1,kp2
+
+  kh1 = (h1-1)/mo_num_per_kpt+1
+  kh2 = (h2-1)/mo_num_per_kpt+1
+  kp1 = (p1-1)/mo_num_per_kpt+1
+  kp2 = (p2-1)/mo_num_per_kpt+1
+  call double_allowed_kpts(kh1,kh2,kp1,kp2,is_allowed)
+end subroutine
+
+subroutine add_integrals_to_map_complex(mask_ijkl)
+  use map_module
+  use bitmasks
+  implicit none
+
+  BEGIN_DOC
+  ! Adds integrals to tha MO map according to some bitmask
+  END_DOC
+
+  integer(bit_kind), intent(in)  :: mask_ijkl(N_int,4)
+
+  integer                        :: i,j,k,l
+  integer                        :: i0,j0,k0,l0
+  double precision               :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0
+
+  integer, allocatable           :: list_ijkl(:,:)
+  integer                        :: n_i, n_j, n_k, n_l
+  integer, allocatable           :: two_e_tmp_0_idx(:)
+  real(integral_kind), allocatable :: two_e_tmp_0(:,:)
+  double precision, allocatable  :: two_e_tmp_1(:)
+  double precision, allocatable  :: two_e_tmp_2(:,:)
+  double precision, allocatable  :: two_e_tmp_3(:,:,:)
+  !DIR$ ATTRIBUTES ALIGN : 64    :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3
+
+  integer                        :: n_integrals
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i(:)
+  real(integral_kind),allocatable :: buffer_value(:)
+  double precision, external     :: map_mb
+
+  integer                        :: i1,j1,k1,l1, ii1, kmax, thread_num
+  integer                        :: i2,i3,i4
+  double precision,parameter     :: thr_coef = 1.d-10
+
+  print*,'not implemented for complex',irp_here
+  stop -1
+!  PROVIDE ao_two_e_integrals_in_map  mo_coef
+!
+!  !Get list of MOs for i,j,k and l
+!  !-------------------------------
+!
+!  allocate(list_ijkl(mo_num,4))
+!  call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int )
+!  call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int )
+!  call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int )
+!  call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int )
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijkl(i,1))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijkl(i,2))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijkl(i,3))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijkl(i,4))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+!  print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
+!      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
+!
+!  double precision               :: accu_bis
+!  accu_bis = 0.d0
+!  call wall_time(wall_1)
+!
+!  !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax,   &
+!      !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
+!      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
+!      !$OMP  wall_0,thread_num,accu_bis)                             &
+!      !$OMP  DEFAULT(NONE)                                           &
+!      !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l,   &
+!      !$OMP  mo_coef_transp,                                         &
+!      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
+!      !$OMP  mo_coef_is_built, wall_1,                               &
+!      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
+!  n_integrals = 0
+!  wall_0 = wall_1
+!  allocate(two_e_tmp_3(mo_num, n_j, n_k),                 &
+!      two_e_tmp_1(mo_num),                                &
+!      two_e_tmp_0(ao_num,ao_num),                                   &
+!      two_e_tmp_0_idx(ao_num),                                      &
+!      two_e_tmp_2(mo_num, n_j),                           &
+!      buffer_i(size_buffer),                                         &
+!      buffer_value(size_buffer) )
+!
+!  thread_num = 0
+!  !$  thread_num = omp_get_thread_num()
+!  !$OMP DO SCHEDULE(guided)
+!  do l1 = 1,ao_num
+!    two_e_tmp_3 = 0.d0
+!    do k1 = 1,ao_num
+!      two_e_tmp_2 = 0.d0
+!      do j1 = 1,ao_num
+!        call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1))
+!        ! call compute_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1))
+!      enddo
+!      do j1 = 1,ao_num
+!        kmax = 0
+!        do i1 = 1,ao_num
+!          c = two_e_tmp_0(i1,j1)
+!          if (c == 0.d0) then
+!            cycle
+!          endif
+!          kmax += 1
+!          two_e_tmp_0(kmax,j1) = c
+!          two_e_tmp_0_idx(kmax) = i1
+!        enddo
+!
+!        if (kmax==0) then
+!          cycle
+!        endif
+!
+!        two_e_tmp_1 = 0.d0
+!        ii1=1
+!        do ii1 = 1,kmax-4,4
+!          i1 = two_e_tmp_0_idx(ii1)
+!          i2 = two_e_tmp_0_idx(ii1+1)
+!          i3 = two_e_tmp_0_idx(ii1+2)
+!          i4 = two_e_tmp_0_idx(ii1+3)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i)  =  two_e_tmp_1(i) +                    &
+!                mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) +        &
+!                mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) +      &
+!                mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) +      &
+!                mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!
+!        i2 = ii1
+!        do ii1 = i2,kmax
+!          i1 = two_e_tmp_0_idx(ii1)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!        c = 0.d0
+!
+!        do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!          c = max(c,abs(two_e_tmp_1(i)))
+!          if (c>mo_integrals_threshold) exit
+!        enddo
+!        if ( c < mo_integrals_threshold ) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          c = mo_coef_transp(j,j1)
+!          if (abs(c) < thr_coef) then
+!            cycle
+!          endif
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_2(i,j0)  = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i)
+!          enddo ! i
+!        enddo  ! j
+!      enddo !j1
+!      if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then
+!        cycle
+!      endif
+!
+!
+!      do k0 = 1, n_k
+!        k = list_ijkl(k0,3)
+!        c = mo_coef_transp(k,k1)
+!        if (abs(c) < thr_coef) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          do i = list_ijkl(1,1), k
+!            two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0)
+!          enddo!i
+!        enddo !j
+!
+!      enddo  !k
+!    enddo   !k1
+!
+!
+!
+!    do l0 = 1,n_l
+!      l = list_ijkl(l0,4)
+!      c = mo_coef_transp(l,l1)
+!      if (abs(c) < thr_coef) then
+!        cycle
+!      endif
+!      j1 = shiftr((l*l-l),1)
+!      do j0 = 1, n_j
+!        j = list_ijkl(j0,2)
+!        if (j > l)  then
+!          exit
+!        endif
+!        j1 += 1
+!        do k0 = 1, n_k
+!          k = list_ijkl(k0,3)
+!          i1 = shiftr((k*k-k),1)
+!          if (i1<=j1) then
+!            continue
+!          else
+!            exit
+!          endif
+!          two_e_tmp_1 = 0.d0
+!          do i0 = 1, n_i
+!            i = list_ijkl(i0,1)
+!            if (i>k) then
+!              exit
+!            endif
+!            two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0)
+!            !           i1+=1
+!          enddo
+!
+!          do i0 = 1, n_i
+!            i = list_ijkl(i0,1)
+!            if(i> min(k,j1-i1+list_ijkl(1,1)-1))then
+!              exit
+!            endif
+!            if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then
+!              cycle
+!            endif
+!            n_integrals += 1
+!            buffer_value(n_integrals) = two_e_tmp_1(i)
+!            !DIR$ FORCEINLINE
+!            call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+!            if (n_integrals == size_buffer) then
+!              call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!                  real(mo_integrals_threshold,integral_kind))
+!              n_integrals = 0
+!            endif
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!
+!    call wall_time(wall_2)
+!    if (thread_num == 0) then
+!      if (wall_2 - wall_0 > 1.d0) then
+!        wall_0 = wall_2
+!        print*, 100.*float(l1)/float(ao_num), '% in ',               &
+!            wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB'
+!      endif
+!    endif
+!  enddo
+!  !$OMP END DO NOWAIT
+!  deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
+!
+!  integer                        :: index_needed
+!
+!  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!      real(mo_integrals_threshold,integral_kind))
+!  deallocate(buffer_i, buffer_value)
+!  !$OMP END PARALLEL
+!  call map_merge(mo_integrals_map)
+!
+!  call wall_time(wall_2)
+!  call cpu_time(cpu_2)
+!  integer*8                      :: get_mo_map_size, mo_map_size
+!  mo_map_size = get_mo_map_size()
+!
+!  deallocate(list_ijkl)
+
+
+end
+
+
+subroutine add_integrals_to_map_three_indices_complex(mask_ijk)
+  use map_module
+  use bitmasks
+  implicit none
+
+  BEGIN_DOC
+  ! Adds integrals to tha MO map according to some bitmask
+  END_DOC
+
+  integer(bit_kind), intent(in)  :: mask_ijk(N_int,3)
+
+  integer                        :: i,j,k,l
+  integer                        :: i0,j0,k0,l0
+  double precision               :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0
+
+  integer, allocatable           :: list_ijkl(:,:)
+  integer                        :: n_i, n_j, n_k
+  integer                        :: m
+  integer, allocatable           :: two_e_tmp_0_idx(:)
+  real(integral_kind), allocatable :: two_e_tmp_0(:,:)
+  double precision, allocatable  :: two_e_tmp_1(:)
+  double precision, allocatable  :: two_e_tmp_2(:,:)
+  double precision, allocatable  :: two_e_tmp_3(:,:,:)
+  !DIR$ ATTRIBUTES ALIGN : 64    :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3
+
+  integer                        :: n_integrals
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i(:)
+  real(integral_kind),allocatable :: buffer_value(:)
+  double precision               :: map_mb
+
+  integer                        :: i1,j1,k1,l1, ii1, kmax, thread_num
+  integer                        :: i2,i3,i4
+  double precision,parameter     :: thr_coef = 1.d-10
+
+  print*,'not implemented for complex',irp_here
+  stop -1
+!  PROVIDE ao_two_e_integrals_in_map  mo_coef
+!
+!  !Get list of MOs for i,j,k and l
+!  !-------------------------------
+!
+!  allocate(list_ijkl(mo_num,4))
+!  call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int )
+!  call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int )
+!  call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int )
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijk(i,1))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijk(i,2))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  j = 0
+!  do i = 1, N_int
+!    j += popcnt(mask_ijk(i,3))
+!  enddo
+!  if(j==0)then
+!    return
+!  endif
+!
+!  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+!  print*, 'Providing the molecular integrals '
+!  print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
+!      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
+!
+!  call wall_time(wall_1)
+!  call cpu_time(cpu_1)
+!  double precision               :: accu_bis
+!  accu_bis = 0.d0
+!  !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
+!      !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
+!      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
+!      !$OMP  wall_0,thread_num,accu_bis)                             &
+!      !$OMP  DEFAULT(NONE)                                           &
+!      !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,       &
+!      !$OMP  mo_coef_transp,                                         &
+!      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
+!      !$OMP  mo_coef_is_built, wall_1,                               &
+!      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
+!  n_integrals = 0
+!  wall_0 = wall_1
+!  allocate(two_e_tmp_3(mo_num, n_j, n_k),                 &
+!      two_e_tmp_1(mo_num),                                &
+!      two_e_tmp_0(ao_num,ao_num),                             &
+!      two_e_tmp_0_idx(ao_num),                                &
+!      two_e_tmp_2(mo_num, n_j),                           &
+!      buffer_i(size_buffer),                                   &
+!      buffer_value(size_buffer) )
+!
+!  thread_num = 0
+!  !$  thread_num = omp_get_thread_num()
+!  !$OMP DO SCHEDULE(guided)
+!  do l1 = 1,ao_num
+!    two_e_tmp_3 = 0.d0
+!    do k1 = 1,ao_num
+!      two_e_tmp_2 = 0.d0
+!      do j1 = 1,ao_num
+!        call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1))
+!      enddo
+!      do j1 = 1,ao_num
+!        kmax = 0
+!        do i1 = 1,ao_num
+!          c = two_e_tmp_0(i1,j1)
+!          if (c == 0.d0) then
+!            cycle
+!          endif
+!          kmax += 1
+!          two_e_tmp_0(kmax,j1) = c
+!          two_e_tmp_0_idx(kmax) = i1
+!        enddo
+!
+!        if (kmax==0) then
+!          cycle
+!        endif
+!
+!        two_e_tmp_1 = 0.d0
+!        ii1=1
+!        do ii1 = 1,kmax-4,4
+!          i1 = two_e_tmp_0_idx(ii1)
+!          i2 = two_e_tmp_0_idx(ii1+1)
+!          i3 = two_e_tmp_0_idx(ii1+2)
+!          i4 = two_e_tmp_0_idx(ii1+3)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i)  =  two_e_tmp_1(i) +                    &
+!                mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) +        &
+!                mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) +      &
+!                mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) +      &
+!                mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!
+!        i2 = ii1
+!        do ii1 = i2,kmax
+!          i1 = two_e_tmp_0_idx(ii1)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!        c = 0.d0
+!
+!        do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!          c = max(c,abs(two_e_tmp_1(i)))
+!          if (c>mo_integrals_threshold) exit
+!        enddo
+!        if ( c < mo_integrals_threshold ) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          c = mo_coef_transp(j,j1)
+!          if (abs(c) < thr_coef) then
+!            cycle
+!          endif
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_2(i,j0)  = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i)
+!          enddo ! i
+!        enddo  ! j
+!      enddo !j1
+!      if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then
+!        cycle
+!      endif
+!
+!
+!      do k0 = 1, n_k
+!        k = list_ijkl(k0,3)
+!        c = mo_coef_transp(k,k1)
+!        if (abs(c) < thr_coef) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          do i = list_ijkl(1,1), k
+!            two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0)
+!          enddo!i
+!        enddo !j
+!
+!      enddo  !k
+!    enddo   !k1
+!
+!
+!
+!    do l0 = 1,n_j
+!      l = list_ijkl(l0,2)
+!      c = mo_coef_transp(l,l1)
+!      if (abs(c) < thr_coef) then
+!        cycle
+!      endif
+!      do k0 = 1, n_k
+!        k = list_ijkl(k0,3)
+!        i1 = shiftr((k*k-k),1)
+!        two_e_tmp_1 = 0.d0
+!        j0 = l0
+!        j = list_ijkl(j0,2)
+!        do i0 = 1, n_i
+!          i = list_ijkl(i0,1)
+!          if (i>k) then
+!            exit
+!          endif
+!          two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0)
+!        enddo
+!
+!        do i0 = 1, n_i
+!          i = list_ijkl(i0,1)
+!          if (i>k) then !min(k,j1-i1)
+!            exit
+!          endif
+!          if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then
+!            cycle
+!          endif
+!          n_integrals += 1
+!          buffer_value(n_integrals) = two_e_tmp_1(i)
+!          if(i==k .and. j==l .and. i.ne.j)then
+!            buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0
+!          endif
+!          !DIR$ FORCEINLINE
+!          call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+!          if (n_integrals == size_buffer) then
+!            call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!                real(mo_integrals_threshold,integral_kind))
+!            n_integrals = 0
+!          endif
+!        enddo
+!      enddo
+!    enddo
+!
+!    do l0 = 1,n_j
+!      l = list_ijkl(l0,2)
+!      c = mo_coef_transp(l,l1)
+!      if (abs(c) < thr_coef) then
+!        cycle
+!      endif
+!      do k0 = 1, n_k
+!        k = list_ijkl(k0,3)
+!        i1 = shiftr((k*k-k),1)
+!        two_e_tmp_1 = 0.d0
+!        j0 = k0
+!        j = list_ijkl(k0,2)
+!        i0 = l0
+!        i = list_ijkl(i0,2)
+!        if (k==l) then
+!          cycle
+!        endif
+!        two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0)
+!
+!        n_integrals += 1
+!        buffer_value(n_integrals) = two_e_tmp_1(i)
+!        !DIR$ FORCEINLINE
+!        call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+!        if (n_integrals == size_buffer) then
+!          call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!              real(mo_integrals_threshold,integral_kind))
+!          n_integrals = 0
+!        endif
+!      enddo
+!    enddo
+!
+!    call wall_time(wall_2)
+!    if (thread_num == 0) then
+!      if (wall_2 - wall_0 > 1.d0) then
+!        wall_0 = wall_2
+!        print*, 100.*float(l1)/float(ao_num), '% in ',               &
+!            wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB'
+!      endif
+!    endif
+!  enddo
+!  !$OMP END DO NOWAIT
+!  deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
+!
+!  integer                        :: index_needed
+!
+!  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!      real(mo_integrals_threshold,integral_kind))
+!  deallocate(buffer_i, buffer_value)
+!  !$OMP END PARALLEL
+!  call map_merge(mo_integrals_map)
+!
+!  call wall_time(wall_2)
+!  call cpu_time(cpu_2)
+!  integer*8                      :: get_mo_map_size, mo_map_size
+!  mo_map_size = get_mo_map_size()
+!
+!  deallocate(list_ijkl)
+!
+!
+!  print*,'Molecular integrals provided:'
+!  print*,' Size of MO map           ', map_mb(mo_integrals_map) ,'MB'
+!  print*,' Number of MO integrals: ',  mo_map_size
+!  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+!  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+
+end
+
+
+subroutine add_integrals_to_map_no_exit_34_complex(mask_ijkl)
+  use map_module
+  use bitmasks
+  implicit none
+
+  BEGIN_DOC
+  ! Adds integrals to tha MO map according to some bitmask
+  END_DOC
+
+  integer(bit_kind), intent(in)  :: mask_ijkl(N_int,4)
+
+  integer                        :: i,j,k,l
+  integer                        :: i0,j0,k0,l0
+  double precision               :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0
+
+  integer, allocatable           :: list_ijkl(:,:)
+  integer                        :: n_i, n_j, n_k, n_l
+  integer, allocatable           :: two_e_tmp_0_idx(:)
+  real(integral_kind), allocatable :: two_e_tmp_0(:,:)
+  double precision, allocatable  :: two_e_tmp_1(:)
+  double precision, allocatable  :: two_e_tmp_2(:,:)
+  double precision, allocatable  :: two_e_tmp_3(:,:,:)
+  !DIR$ ATTRIBUTES ALIGN : 64    :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3
+
+  integer                        :: n_integrals
+  integer                        :: size_buffer
+  integer(key_kind),allocatable  :: buffer_i(:)
+  real(integral_kind),allocatable :: buffer_value(:)
+  double precision               :: map_mb
+
+  integer                        :: i1,j1,k1,l1, ii1, kmax, thread_num
+  integer                        :: i2,i3,i4
+  double precision,parameter     :: thr_coef = 1.d-10
+
+  print*,'not implemented for complex',irp_here
+  stop -1
+!  PROVIDE ao_two_e_integrals_in_map  mo_coef
+!
+!  !Get list of MOs for i,j,k and l
+!  !-------------------------------
+!
+!  allocate(list_ijkl(mo_num,4))
+!  call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int )
+!  call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int )
+!  call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int )
+!  call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int )
+!
+!  size_buffer = min(ao_num*ao_num*ao_num,16000000)
+!  print*, 'Providing the molecular integrals '
+!  print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
+!      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
+!
+!  call wall_time(wall_1)
+!  call cpu_time(cpu_1)
+!
+!  !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax,   &
+!      !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
+!      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
+!      !$OMP  wall_0,thread_num)                                      &
+!      !$OMP  DEFAULT(NONE)                                           &
+!      !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l,   &
+!      !$OMP  mo_coef_transp,                                         &
+!      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
+!      !$OMP  mo_coef_is_built, wall_1,                               &
+!      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
+!  n_integrals = 0
+!  wall_0 = wall_1
+!  allocate(two_e_tmp_3(mo_num, n_j, n_k),                 &
+!      two_e_tmp_1(mo_num),                                &
+!      two_e_tmp_0(ao_num,ao_num),                                   &
+!      two_e_tmp_0_idx(ao_num),                                      &
+!      two_e_tmp_2(mo_num, n_j),                           &
+!      buffer_i(size_buffer),                                         &
+!      buffer_value(size_buffer) )
+!
+!  thread_num = 0
+!  !$  thread_num = omp_get_thread_num()
+!  !$OMP DO SCHEDULE(guided)
+!  do l1 = 1,ao_num
+!    !IRP_IF COARRAY
+!    !    if (mod(l1-this_image(),num_images()) /= 0 ) then
+!    !      cycle
+!    !    endif
+!    !IRP_ENDIF
+!    two_e_tmp_3 = 0.d0
+!    do k1 = 1,ao_num
+!      two_e_tmp_2 = 0.d0
+!      do j1 = 1,ao_num
+!        call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1))
+!        ! call compute_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1))
+!      enddo
+!      do j1 = 1,ao_num
+!        kmax = 0
+!        do i1 = 1,ao_num
+!          c = two_e_tmp_0(i1,j1)
+!          if (c == 0.d0) then
+!            cycle
+!          endif
+!          kmax += 1
+!          two_e_tmp_0(kmax,j1) = c
+!          two_e_tmp_0_idx(kmax) = i1
+!        enddo
+!
+!        if (kmax==0) then
+!          cycle
+!        endif
+!
+!        two_e_tmp_1 = 0.d0
+!        ii1=1
+!        do ii1 = 1,kmax-4,4
+!          i1 = two_e_tmp_0_idx(ii1)
+!          i2 = two_e_tmp_0_idx(ii1+1)
+!          i3 = two_e_tmp_0_idx(ii1+2)
+!          i4 = two_e_tmp_0_idx(ii1+3)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i)  =  two_e_tmp_1(i) +                    &
+!                mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) +        &
+!                mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) +      &
+!                mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) +      &
+!                mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!
+!        i2 = ii1
+!        do ii1 = i2,kmax
+!          i1 = two_e_tmp_0_idx(ii1)
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1)
+!          enddo ! i
+!        enddo  ! ii1
+!        c = 0.d0
+!
+!        do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!          c = max(c,abs(two_e_tmp_1(i)))
+!          if (c>mo_integrals_threshold) exit
+!        enddo
+!        if ( c < mo_integrals_threshold ) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          c = mo_coef_transp(j,j1)
+!          if (abs(c) < thr_coef) then
+!            cycle
+!          endif
+!          do i = list_ijkl(1,1), list_ijkl(n_i,1)
+!            two_e_tmp_2(i,j0)  = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i)
+!          enddo ! i
+!        enddo  ! j
+!      enddo !j1
+!      if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then
+!        cycle
+!      endif
+!
+!
+!      do k0 = 1, n_k
+!        k = list_ijkl(k0,3)
+!        c = mo_coef_transp(k,k1)
+!        if (abs(c) < thr_coef) then
+!          cycle
+!        endif
+!
+!        do j0 = 1, n_j
+!          j = list_ijkl(j0,2)
+!          do i = list_ijkl(1,1), k
+!            two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0)
+!          enddo!i
+!        enddo !j
+!
+!      enddo  !k
+!    enddo   !k1
+!
+!
+!
+!    do l0 = 1,n_l
+!      l = list_ijkl(l0,4)
+!      c = mo_coef_transp(l,l1)
+!      if (abs(c) < thr_coef) then
+!        cycle
+!      endif
+!      j1 = shiftr((l*l-l),1)
+!      do j0 = 1, n_j
+!        j = list_ijkl(j0,2)
+!        if (j > l)  then
+!          exit
+!        endif
+!        j1 += 1
+!        do k0 = 1, n_k
+!          k = list_ijkl(k0,3)
+!          i1 = shiftr((k*k-k),1)
+!          two_e_tmp_1 = 0.d0
+!          do i0 = 1, n_i
+!            i = list_ijkl(i0,1)
+!            if (i>k) then
+!              exit
+!            endif
+!            two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0)
+!          enddo
+!
+!          do i0 = 1, n_i
+!            i = list_ijkl(i0,1)
+!            if(i> k)then
+!              exit
+!            endif
+!
+!            if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then
+!              cycle
+!            endif
+!            n_integrals += 1
+!            buffer_value(n_integrals) = two_e_tmp_1(i)
+!            !DIR$ FORCEINLINE
+!            call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+!            if (n_integrals == size_buffer) then
+!              call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!                  real(mo_integrals_threshold,integral_kind))
+!              n_integrals = 0
+!            endif
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!
+!    call wall_time(wall_2)
+!    if (thread_num == 0) then
+!      if (wall_2 - wall_0 > 1.d0) then
+!        wall_0 = wall_2
+!        print*, 100.*float(l1)/float(ao_num), '% in ',               &
+!            wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB'
+!      endif
+!    endif
+!  enddo
+!  !$OMP END DO NOWAIT
+!  deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
+!
+!  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+!      real(mo_integrals_threshold,integral_kind))
+!  deallocate(buffer_i, buffer_value)
+!  !$OMP END PARALLEL
+!  !IRP_IF COARRAY
+!  !  print*, 'Communicating the map'
+!  !  call communicate_mo_integrals()
+!  !IRP_ENDIF
+!  call map_merge(mo_integrals_map)
+!
+!  call wall_time(wall_2)
+!  call cpu_time(cpu_2)
+!  integer*8                      :: get_mo_map_size, mo_map_size
+!  mo_map_size = get_mo_map_size()
+!
+!  deallocate(list_ijkl)
+!
+!
+!  print*,'Molecular integrals provided:'
+!  print*,' Size of MO map           ', map_mb(mo_integrals_map) ,'MB'
+!  print*,' Number of MO integrals: ',  mo_map_size
+!  print*,' cpu  time :',cpu_2 - cpu_1, 's'
+!  print*,' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')'
+
+
+end
diff --git a/src/mpi/mpi.irp.f b/src/mpi/mpi.irp.f
index d947f1b9..41f303ea 100644
--- a/src/mpi/mpi.irp.f
+++ b/src/mpi/mpi.irp.f
@@ -93,6 +93,7 @@ SUBST [ double, type, 8, DOUBLE_PRECISION ]
 double    ; double precision  ; 8 ; DOUBLE_PRECISION ;;
 integer   ; integer           ; 4  ; INTEGER ;;
 integer8  ; integer*8         ; 8  ; INTEGER8 ;;
+complex_double  ; complex*16  ; 16 ; DOUBLE_COMPLEX ;;
 
 END_TEMPLATE
 
diff --git a/src/nuclei/EZFIO.cfg b/src/nuclei/EZFIO.cfg
index 34c27c46..b4599b72 100644
--- a/src/nuclei/EZFIO.cfg
+++ b/src/nuclei/EZFIO.cfg
@@ -32,8 +32,31 @@ doc: Nuclear repulsion (Computed automaticaly or Read in the |EZFIO|)
 type:double precision
 interface: ezfio
 
-[is_periodic]
+[is_complex]
 type: logical
 doc: If true, the calculation uses periodic boundary conditions
 interface: ezfio, provider, ocaml
 default: false
+
+[io_kconserv]
+doc: Read/Write kconserv array from/to disk [ Write | Read | None ]
+type: Disk_access
+interface: ezfio,provider,ocaml
+default: None
+
+[kpt_num]
+doc: Number of k-points
+type: integer
+interface: ezfio, provider, ocaml
+default: 1
+
+[kpt_pair_num]
+doc: Number of k-point pairs
+type: integer
+interface: ezfio
+
+[kconserv]
+type: integer
+doc: array containing information about k-point symmetry
+size: (nuclei.kpt_num,nuclei.kpt_num,nuclei.kpt_num)
+interface: ezfio
diff --git a/src/nuclei/kconserv_cplx.irp.f b/src/nuclei/kconserv_cplx.irp.f
new file mode 100644
index 00000000..616ba779
--- /dev/null
+++ b/src/nuclei/kconserv_cplx.irp.f
@@ -0,0 +1,40 @@
+BEGIN_PROVIDER [integer, kpt_pair_num]
+  implicit none
+  kpt_pair_num = shiftr(kpt_num*kpt_num+kpt_num,1)
+END_PROVIDER
+
+BEGIN_PROVIDER [integer, kconserv, (kpt_num,kpt_num,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  ! Information about k-point symmetry
+  !
+  ! for k-points I,J,K: kconserv(I,J,K) gives L such that
+  ! k_I + k_J = k_K + k_L
+  ! two-electron integrals of the form <ij|kx>
+  ! (where i,j,k have momentum k_I, k_J, k_K)
+  ! will only be nonzero if x has momentum k_L (as described above)
+  !
+  END_DOC
+  integer                        :: i,j,k,l
+
+  if (read_kconserv) then
+    call ezfio_get_nuclei_kconserv(kconserv)
+    print *,  'kconserv read from disk'
+  else
+    print*,'kconserv must be provided'
+    stop -1
+  endif
+  if (write_kconserv) then
+    call ezfio_set_nuclei_kconserv(kconserv)
+    print *,  'kconserv written to disk'
+  endif
+END_PROVIDER
+
+subroutine double_allowed_kpts(kh1,kh2,kp1,kp2,is_allowed)
+  implicit none
+  integer, intent(in) :: kh1,kh2,kp1,kp2
+  logical, intent(out) :: is_allowed
+
+  is_allowed = (kconserv(kh1,kh2,kp1) == kp2)
+end subroutine
+
diff --git a/src/scf_utils/diagonalize_fock_cplx.irp.f b/src/scf_utils/diagonalize_fock_cplx.irp.f
new file mode 100644
index 00000000..d8bb1b6e
--- /dev/null
+++ b/src/scf_utils/diagonalize_fock_cplx.irp.f
@@ -0,0 +1,182 @@
+BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_mo_complex, (ao_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Eigenvectors of the Fock matrix in the |MO| basis obtained with level shift.
+   END_DOC
+
+   integer                        :: i,j
+   integer                        :: n
+   complex*16, allocatable :: F(:,:)
+   double precision, allocatable  :: diag(:)
+
+
+   allocate( F(mo_num,mo_num) )
+   allocate (diag(mo_num) )
+
+   do j=1,mo_num
+     do i=1,mo_num
+       F(i,j) = fock_matrix_mo_complex(i,j)
+     enddo
+   enddo
+
+   if(frozen_orb_scf)then
+     integer                        :: iorb,jorb
+     do i = 1, n_core_orb
+      iorb = list_core(i)
+      do j = 1, n_act_orb
+       jorb = list_act(j)
+       F(iorb,jorb) = (0.d0,0.d0)
+       F(jorb,iorb) = (0.d0,0.d0)
+      enddo
+     enddo
+   endif
+
+   ! Insert level shift here
+   do i = elec_beta_num+1, elec_alpha_num
+     F(i,i) += 0.5d0*level_shift
+   enddo
+
+   do i = elec_alpha_num+1, mo_num
+     F(i,i) += level_shift
+   enddo
+
+   n = mo_num
+   call lapack_diagd_diag_in_place_complex(diag,F,n,n)
+
+   call zgemm('N','N',ao_num,mo_num,mo_num, (1.d0,0.d0),            &
+       mo_coef_complex, size(mo_coef_complex,1), F, size(F,1),                       &
+       (0.d0,0.d0), eigenvectors_Fock_matrix_mo_complex, size(eigenvectors_Fock_matrix_mo_complex,1))
+   deallocate(F, diag)
+
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_mo_kpts, (ao_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Eigenvectors of the Fock matrix in the |MO| basis obtained with level shift.
+  END_DOC
+
+  integer                        :: i,j,k
+  integer                        :: n
+  complex*16, allocatable :: F(:,:)
+  double precision, allocatable  :: diag(:)
+
+
+  allocate( F(mo_num_per_kpt,mo_num_per_kpt) )
+  allocate (diag(mo_num_per_kpt) )
+
+  do k=1,kpt_num 
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        !F(i,j) = fock_matrix_mo_complex(i,j)
+        F(i,j) = fock_matrix_mo_kpts(i,j,k)
+      enddo
+    enddo
+  
+    if(frozen_orb_scf)then
+      integer                        :: iorb,jorb
+      !todo: core/act per kpt
+      do i = 1, n_core_orb
+       iorb = list_core(i)
+       do j = 1, n_act_orb
+        jorb = list_act(j)
+        F(iorb,jorb) = (0.d0,0.d0)
+        F(jorb,iorb) = (0.d0,0.d0)
+       enddo
+      enddo
+    endif
+  
+    ! Insert level shift here
+    !todo: elec per kpt
+    do i = elec_beta_num_kpts(k)+1, elec_alpha_num_kpts(k)
+      F(i,i) += 0.5d0*level_shift
+    enddo
+  
+    do i = elec_alpha_num_kpts(k)+1, mo_num_per_kpt
+      F(i,i) += level_shift
+    enddo
+  
+    n = mo_num_per_kpt
+    call lapack_diagd_diag_in_place_complex(diag,F,n,n)
+  
+    call zgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt, (1.d0,0.d0),            &
+        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1), F, size(F,1),                       &
+        (0.d0,0.d0), eigenvectors_Fock_matrix_mo_kpts(:,:,k), size(eigenvectors_Fock_matrix_mo_kpts,1))
+  enddo
+  deallocate(F, diag)
+
+
+END_PROVIDER
+BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_mo_kpts_real, (ao_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  BEGIN_DOC
+  ! Eigenvectors of the Fock matrix in the |MO| basis obtained with level shift.
+  END_DOC
+
+  integer                        :: i,j,k
+  integer                        :: n
+  !complex*16, allocatable :: F(:,:)
+  double precision, allocatable :: F(:,:)
+  double precision, allocatable  :: diag(:), mo_coef_tmp(:,:), eigvecs_tmp(:,:)
+
+  allocate( F(mo_num_per_kpt,mo_num_per_kpt) )
+  allocate (diag(mo_num_per_kpt) )
+  allocate (mo_coef_tmp(ao_num_per_kpt,mo_num_per_kpt) )
+  allocate (eigvecs_tmp(ao_num_per_kpt,mo_num_per_kpt) )
+
+  do k=1,kpt_num 
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        !F(i,j) = fock_matrix_mo_complex(i,j)
+        F(i,j) = dble(fock_matrix_mo_kpts(i,j,k))
+      enddo
+    enddo
+  
+    if(frozen_orb_scf)then
+      integer                        :: iorb,jorb
+      !todo: core/act per kpt
+      do i = 1, n_core_orb
+       iorb = list_core(i)
+       do j = 1, n_act_orb
+        jorb = list_act(j)
+        F(iorb,jorb) = 0.d0
+        F(jorb,iorb) = 0.d0
+       enddo
+      enddo
+    endif
+  
+    ! Insert level shift here
+    !todo: elec per kpt
+    do i = elec_beta_num_kpts(k)+1, elec_alpha_num_kpts(k)
+      F(i,i) += 0.5d0*level_shift
+    enddo
+  
+    do i = elec_alpha_num_kpts(k)+1, mo_num_per_kpt
+      F(i,i) += level_shift
+    enddo
+  
+    n = mo_num_per_kpt
+    call lapack_diagd_diag_in_place(diag,F,n,n)
+    
+    mo_coef_tmp = dble(mo_coef_kpts(:,:,k))
+    call dgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt, 1.d0,            &
+        mo_coef_tmp, size(mo_coef_tmp,1), F, size(F,1),                       &
+        0.d0, eigvecs_tmp, size(eigvecs_tmp,1))
+
+    call zlacp2('X',ao_num_per_kpt,mo_num_per_kpt,eigvecs_tmp,size(eigvecs_tmp,1), &
+        eigenvectors_fock_matrix_mo_kpts_real(:,:,k), size(eigenvectors_Fock_matrix_mo_kpts_real,1))
+
+!    call zgemm('N','N',ao_num_per_kpt,mo_num_per_kpt,mo_num_per_kpt, (1.d0,0.d0),            &
+!        mo_coef_kpts(:,:,k), size(mo_coef_kpts,1), F, size(F,1),                       &
+!        (0.d0,0.d0), eigenvectors_Fock_matrix_mo_kpts(:,:,k), size(eigenvectors_Fock_matrix_mo_kpts,1))
+  enddo
+  deallocate(F, diag,mo_coef_tmp,eigvecs_tmp)
+
+
+END_PROVIDER
diff --git a/src/scf_utils/diis_cplx.irp.f b/src/scf_utils/diis_cplx.irp.f
new file mode 100644
index 00000000..601b9b97
--- /dev/null
+++ b/src/scf_utils/diis_cplx.irp.f
@@ -0,0 +1,294 @@
+
+BEGIN_PROVIDER [complex*16, FPS_SPF_Matrix_AO_complex, (AO_num, AO_num)]
+  implicit none
+  BEGIN_DOC
+  !   Commutator FPS - SPF
+  END_DOC
+  complex*16, allocatable  :: scratch(:,:)
+  allocate(                                                          &
+      scratch(AO_num, AO_num)                                  &
+      )
+
+  ! Compute FP
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,                           &
+      (1.d0,0.d0),                                                          &
+      Fock_Matrix_AO_complex,Size(Fock_Matrix_AO_complex,1),                &
+      SCF_Density_Matrix_AO_complex,Size(SCF_Density_Matrix_AO_complex,1),  &
+      (0.d0,0.d0),                                                          &
+      scratch,Size(scratch,1))
+
+  ! Compute FPS
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,                           &
+      (1.d0,0.d0),                                                   &
+      scratch,Size(scratch,1),                                       &
+      AO_Overlap_complex,Size(AO_Overlap_complex,1),                 &
+      (0.d0,0.d0),                                                  &
+      FPS_SPF_Matrix_AO_complex,Size(FPS_SPF_Matrix_AO_complex,1))
+
+  ! Compute SP
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,                           &
+      (1.d0,0.d0),                                                          &
+      AO_Overlap_complex,Size(AO_Overlap_complex,1),                        &
+      SCF_Density_Matrix_AO_complex,Size(SCF_Density_Matrix_AO_complex,1),  &
+      (0.d0,0.d0),                                                          &
+      scratch,Size(scratch,1))
+
+  ! Compute FPS - SPF
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,                           &
+      (-1.d0,0.d0),                                                  &
+      scratch,Size(scratch,1),                                       &
+      Fock_Matrix_AO_complex,Size(Fock_Matrix_AO_complex,1),         &
+      (1.d0,0.d0),                                                   &
+      FPS_SPF_Matrix_AO_complex,Size(FPS_SPF_Matrix_AO_complex,1))
+
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, FPS_SPF_Matrix_MO_complex, (mo_num, mo_num)]
+  implicit none
+  begin_doc
+!   Commutator FPS - SPF in MO basis
+  end_doc
+  call ao_to_mo_complex(FPS_SPF_Matrix_AO_complex, size(FPS_SPF_Matrix_AO_complex,1), &
+     FPS_SPF_Matrix_MO_complex, size(FPS_SPF_Matrix_MO_complex,1))
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO_complex, (AO_num) ]
+&BEGIN_PROVIDER [ complex*16, eigenvectors_Fock_matrix_AO_complex, (AO_num,AO_num) ]
+   !TODO: finish this provider; write provider for S_half_inv_complex
+   BEGIN_DOC
+   ! Eigenvalues and eigenvectors of the Fock matrix over the AO basis
+   END_DOC
+
+   implicit none
+
+   double precision, allocatable  :: rwork(:)
+   integer                        :: lwork,info,lrwork
+   complex*16, allocatable        :: scratch(:,:),Xt(:,:),work(:)
+   integer                        :: i,j
+
+   allocate(                                                         &
+       scratch(AO_num,AO_num),                                 &
+       Xt(AO_num,AO_num)                                             &
+       )
+
+! Calculate Xt
+
+  do i=1,AO_num
+    do j=1,AO_num
+      Xt(i,j) = dconjg(S_half_inv_complex(j,i))
+    enddo
+  enddo
+
+! Calculate Fock matrix in orthogonal basis: F' = Xt.F.X
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,     &
+       (1.d0,0.d0),                                   &
+       Fock_matrix_AO_complex,size(Fock_matrix_AO_complex,1),  &
+       S_half_inv_complex,size(s_half_inv_complex,1),        &
+       (0.d0,0.d0),                                   &
+       eigenvectors_Fock_matrix_AO_complex, &
+       size(eigenvectors_Fock_matrix_AO_complex,1))
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,                              &
+       (1.d0,0.d0),                                                            &
+       Xt,size(Xt,1),                                                   &
+       eigenvectors_Fock_matrix_AO_complex, &
+       size(eigenvectors_Fock_matrix_AO_complex,1), &
+       (0.d0,0.d0),                                                            &
+       scratch,size(scratch,1))
+
+! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+   lrwork = 3*ao_num - 2
+   allocate(rwork(lrwork), work(1))
+   lwork = -1
+
+   call zheev('V','U',ao_num, &
+        scratch,size(scratch,1), &
+        eigenvalues_Fock_matrix_AO_complex, &
+        work,lwork,rwork,info)
+
+   lwork = int(work(1))
+   deallocate(work)
+   allocate(work(lwork))
+
+   call zheev('V','U',ao_num, &
+        scratch,size(scratch,1), &
+        eigenvalues_Fock_matrix_AO_complex, &
+        work,lwork,rwork,info)
+
+   if(info /= 0) then
+     print *,  irp_here//' failed : ', info
+     stop 1
+   endif
+   
+   deallocate(work,rwork)
+! Back-transform eigenvectors: C =X.C'
+
+  call zgemm('N','N',AO_num,AO_num,AO_num,     &
+       (1.d0,0.d0),                                   &
+       S_half_inv_complex,size(S_half_inv_complex,1),        &
+       scratch,size(scratch,1),                &
+       (0.d0,0.d0),                                   &
+       eigenvectors_Fock_matrix_AO_complex, &
+       size(eigenvectors_Fock_matrix_AO_complex,1))
+
+  deallocate(scratch)
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [complex*16, FPS_SPF_Matrix_AO_kpts, (AO_num_per_kpt, AO_num_per_kpt,kpt_num)]
+  implicit none
+  BEGIN_DOC
+  !   Commutator FPS - SPF
+  END_DOC
+  complex*16, allocatable  :: scratch(:,:)
+  integer :: k
+  allocate(                                                          &
+      scratch(ao_num_per_kpt, ao_num_per_kpt)                                  &
+      )
+
+  do k=1,kpt_num
+
+    ! Compute FP
+
+    call zgemm('N','N',AO_num_per_kpt,AO_num_per_kpt,AO_num_per_kpt,                           &
+        (1.d0,0.d0),                                                          &
+        Fock_Matrix_AO_kpts(1,1,k),Size(Fock_Matrix_AO_kpts,1),                &
+        scf_density_matrix_ao_kpts(1,1,k),Size(SCF_Density_Matrix_AO_kpts,1),  &
+        (0.d0,0.d0),                                                          &
+        scratch,Size(scratch,1))
+
+    ! Compute FPS
+
+    call zgemm('N','N',AO_num_per_kpt,AO_num_per_kpt,AO_num_per_kpt,                           &
+        (1.d0,0.d0),                                                   &
+        scratch,Size(scratch,1),                                       &
+        AO_Overlap_kpts(1,1,k),Size(AO_Overlap_kpts,1),                 &
+        (0.d0,0.d0),                                                  &
+        FPS_SPF_Matrix_AO_kpts(1,1,k),Size(FPS_SPF_Matrix_AO_kpts,1))
+
+    ! Compute SP
+
+    call zgemm('N','N',AO_num_per_kpt,AO_num_per_kpt,AO_num_per_kpt,                           &
+        (1.d0,0.d0),                                                          &
+        AO_Overlap_kpts(1,1,k),Size(AO_Overlap_kpts,1),                        &
+        SCF_Density_Matrix_AO_kpts(1,1,k),Size(SCF_Density_Matrix_AO_kpts,1),  &
+        (0.d0,0.d0),                                                          &
+        scratch,Size(scratch,1))
+
+    ! Compute FPS - SPF
+
+    call zgemm('N','N',AO_num_per_kpt,AO_num_per_kpt,AO_num_per_kpt,                           &
+        (-1.d0,0.d0),                                                  &
+        scratch,Size(scratch,1),                                       &
+        Fock_Matrix_AO_kpts(1,1,k),Size(Fock_Matrix_AO_kpts,1),         &
+        (1.d0,0.d0),                                                   &
+        FPS_SPF_Matrix_AO_kpts(1,1,k),Size(FPS_SPF_Matrix_AO_kpts,1))
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [complex*16, FPS_SPF_Matrix_MO_kpts, (mo_num_per_kpt, mo_num_per_kpt,kpt_num)]
+  implicit none
+  begin_doc
+!   Commutator FPS - SPF in MO basis
+  end_doc
+  call ao_to_mo_kpts(FPS_SPF_Matrix_AO_kpts, size(FPS_SPF_Matrix_AO_kpts,1), &
+     FPS_SPF_Matrix_MO_kpts, size(FPS_SPF_Matrix_MO_kpts,1))
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, eigenvalues_fock_matrix_ao_kpts, (ao_num_per_kpt,kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, eigenvectors_fock_matrix_ao_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   !TODO: finish this provider; write provider for S_half_inv_complex
+   BEGIN_DOC
+   ! Eigenvalues and eigenvectors of the Fock matrix over the AO basis
+   END_DOC
+
+   implicit none
+
+   double precision, allocatable  :: rwork(:)
+   integer                        :: lwork,info,lrwork
+   complex*16, allocatable        :: scratch(:,:),Xt(:,:),work(:)
+   integer                        :: i,j,k
+
+
+   allocate(                                                         &
+       scratch(ao_num_per_kpt,ao_num_per_kpt),                                 &
+       Xt(ao_num_per_kpt,ao_num_per_kpt)                                             &
+       )
+
+  do k=1,kpt_num
+  ! Calculate Xt
+  
+    do i=1,ao_num_per_kpt
+      do j=1,ao_num_per_kpt
+!        Xt(i,j) = dconjg(s_half_inv_complex(j,i,k))
+        Xt(i,j) = dconjg(S_half_inv_kpts(j,i,k))
+      enddo
+    enddo
+  
+  ! Calculate Fock matrix in orthogonal basis: F' = Xt.F.X
+  
+    call zgemm('N','N',ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,     &
+         (1.d0,0.d0),                                   &
+         fock_matrix_ao_kpts(1,1,k),size(fock_matrix_ao_kpts,1),  &
+         s_half_inv_kpts(1,1,k),size(s_half_inv_kpts,1),        &
+         (0.d0,0.d0),                                   &
+         eigenvectors_fock_matrix_ao_kpts(1,1,k), &
+         size(eigenvectors_fock_matrix_ao_kpts,1))
+  
+    call zgemm('N','N',ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,                              &
+         (1.d0,0.d0),                                                            &
+         Xt,size(Xt,1),                                                   &
+         eigenvectors_fock_matrix_ao_kpts(1,1,k), &
+         size(eigenvectors_fock_matrix_ao_kpts,1), &
+         (0.d0,0.d0),                                                            &
+         scratch,size(scratch,1))
+  
+  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+     lrwork = 3*ao_num_per_kpt - 2
+     allocate(rwork(lrwork), work(1))
+     lwork = -1
+  
+     call zheev('V','U',ao_num_per_kpt, &
+          scratch,size(scratch,1), &
+          eigenvalues_fock_matrix_ao_kpts(1,k), &
+          work,lwork,rwork,info)
+  
+     lwork = int(work(1))
+     deallocate(work)
+     allocate(work(lwork))
+  
+     call zheev('V','U',ao_num_per_kpt, &
+          scratch,size(scratch,1), &
+          eigenvalues_fock_matrix_ao_kpts(1,k), &
+          work,lwork,rwork,info)
+  
+     if(info /= 0) then
+       print *,  irp_here//' failed : ', info
+       stop 1
+     endif
+     
+     deallocate(work,rwork)
+  ! Back-transform eigenvectors: C =X.C'
+  
+    call zgemm('N','N',ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,     &
+         (1.d0,0.d0),                                   &
+         s_half_inv_kpts(1,1,k),size(s_half_inv_kpts,1),        &
+         scratch,size(scratch,1),                &
+         (0.d0,0.d0),                                   &
+         eigenvectors_fock_matrix_ao_kpts(1,1,k), &
+         size(eigenvectors_fock_matrix_ao_kpts,1))
+  enddo
+  deallocate(scratch)
+END_PROVIDER
diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f
index fc9eaadd..efd64be1 100644
--- a/src/scf_utils/fock_matrix.irp.f
+++ b/src/scf_utils/fock_matrix.irp.f
@@ -101,21 +101,29 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_num,mo_num) ]
    BEGIN_DOC
    ! Fock matrix on the MO basis
    END_DOC
-   call ao_to_mo(Fock_matrix_ao_alpha,size(Fock_matrix_ao_alpha,1), &
+   if (is_complex) then
+     print*,'error',irp_here
+     stop -1
+   else
+     call ao_to_mo(Fock_matrix_ao_alpha,size(Fock_matrix_ao_alpha,1), &
                  Fock_matrix_mo_alpha,size(Fock_matrix_mo_alpha,1))
+   endif
 END_PROVIDER
 
-
 BEGIN_PROVIDER [ double precision, Fock_matrix_mo_beta, (mo_num,mo_num) ]
    implicit none
    BEGIN_DOC
    ! Fock matrix on the MO basis
    END_DOC
-   call ao_to_mo(Fock_matrix_ao_beta,size(Fock_matrix_ao_beta,1), &
+   if (is_complex) then
+     print*,'error',irp_here
+     stop -1
+   else
+     call ao_to_mo(Fock_matrix_ao_beta,size(Fock_matrix_ao_beta,1), &
                  Fock_matrix_mo_beta,size(Fock_matrix_mo_beta,1))
+   endif
 END_PROVIDER
 
-
 BEGIN_PROVIDER [ double precision, Fock_matrix_ao, (ao_num, ao_num) ]
  implicit none
  BEGIN_DOC
@@ -142,7 +150,6 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_ao, (ao_num, ao_num) ]
  endif
 END_PROVIDER
 
-
 BEGIN_PROVIDER [ double precision, SCF_energy ]
  implicit none
  BEGIN_DOC
@@ -150,14 +157,30 @@ BEGIN_PROVIDER [ double precision, SCF_energy ]
  END_DOC
  SCF_energy = nuclear_repulsion
 
- integer                        :: i,j
- do j=1,ao_num
-   do i=1,ao_num
-     SCF_energy += 0.5d0 * (                                          &
-         (ao_one_e_integrals(i,j) + Fock_matrix_ao_alpha(i,j) ) *  SCF_density_matrix_ao_alpha(i,j) +&
-         (ao_one_e_integrals(i,j) + Fock_matrix_ao_beta (i,j) ) *  SCF_density_matrix_ao_beta (i,j) )
+ integer                        :: i,j,k
+ if (is_complex) then
+   complex*16 :: scf_e_tmp
+   scf_e_tmp = dcmplx(SCF_energy,0.d0)
+   do k=1,kpt_num
+     do j=1,ao_num_per_kpt
+       do i=1,ao_num_per_kpt
+         scf_e_tmp += 0.5d0 * (                                          &
+             (ao_one_e_integrals_kpts(i,j,k) + Fock_matrix_ao_alpha_kpts(i,j,k) ) *  SCF_density_matrix_ao_alpha_kpts(j,i,k) +&
+             (ao_one_e_integrals_kpts(i,j,k) + Fock_matrix_ao_beta_kpts (i,j,k) ) *  SCF_density_matrix_ao_beta_kpts (j,i,k) )
+       enddo
+     enddo
    enddo
- enddo
+   !TODO: add check for imaginary part? (should be zero)
+   SCF_energy = dble(scf_e_tmp)
+ else
+   do j=1,ao_num
+     do i=1,ao_num
+       SCF_energy += 0.5d0 * (                                          &
+           (ao_one_e_integrals(i,j) + Fock_matrix_ao_alpha(i,j) ) *  SCF_density_matrix_ao_alpha(i,j) +&
+           (ao_one_e_integrals(i,j) + Fock_matrix_ao_beta (i,j) ) *  SCF_density_matrix_ao_beta (i,j) )
+     enddo
+   enddo
+ endif
  SCF_energy += extra_e_contrib_density
 
 END_PROVIDER
diff --git a/src/scf_utils/fock_matrix_cplx.irp.f b/src/scf_utils/fock_matrix_cplx.irp.f
new file mode 100644
index 00000000..e2ada6fc
--- /dev/null
+++ b/src/scf_utils/fock_matrix_cplx.irp.f
@@ -0,0 +1,822 @@
+ BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_complex, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_diag_mo_complex, (mo_num)]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis.
+   ! For open shells, the ROHF Fock Matrix is ::
+   !
+   !       |   F-K    |  F + K/2  |    F     |
+   !       |---------------------------------|
+   !       | F + K/2  |     F     |  F - K/2 |
+   !       |---------------------------------|
+   !       |    F     |  F - K/2  |  F + K   |
+   !
+   !
+   ! F = 1/2 (Fa + Fb)
+   !
+   ! K = Fb - Fa
+   !
+   END_DOC
+   integer                        :: i,j,n
+   if (elec_alpha_num == elec_beta_num) then
+     Fock_matrix_mo_complex = Fock_matrix_mo_alpha_complex
+   else
+
+     do j=1,elec_beta_num
+       ! F-K
+       do i=1,elec_beta_num !CC
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))&
+             - (Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+       ! F+K/2
+       do i=elec_beta_num+1,elec_alpha_num  !CA
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))&
+             + 0.5d0*(Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+       ! F
+       do i=elec_alpha_num+1, mo_num !CV
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))
+       enddo
+     enddo
+
+     do j=elec_beta_num+1,elec_alpha_num
+       ! F+K/2
+       do i=1,elec_beta_num !AC
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))&
+             + 0.5d0*(Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+       ! F
+       do i=elec_beta_num+1,elec_alpha_num !AA
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_alpha_num+1, mo_num !AV
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))&
+             - 0.5d0*(Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+     enddo
+
+     do j=elec_alpha_num+1, mo_num
+       ! F
+       do i=1,elec_beta_num !VC
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_beta_num+1,elec_alpha_num !VA
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j))&
+             - 0.5d0*(Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+       ! F+K
+       do i=elec_alpha_num+1,mo_num !VV
+         Fock_matrix_mo_complex(i,j) = 0.5d0*(Fock_matrix_mo_alpha_complex(i,j)+Fock_matrix_mo_beta_complex(i,j)) &
+             + (Fock_matrix_mo_beta_complex(i,j) - Fock_matrix_mo_alpha_complex(i,j))
+       enddo
+     enddo
+
+   endif
+
+   do i = 1, mo_num
+     Fock_matrix_diag_mo_complex(i) = dble(Fock_matrix_mo_complex(i,i))
+     if (dabs(dimag(Fock_matrix_mo_complex(i,i))) .gt. 1.0d-12) then
+       !stop 'diagonal elements of Fock matrix should be real'
+       print *, 'diagonal elements of Fock matrix should be real',i,Fock_matrix_mo_complex(i,i)
+       !stop -1
+     endif
+   enddo
+
+
+   if(frozen_orb_scf)then
+     integer                        :: iorb,jorb
+     do i = 1, n_core_orb
+      iorb = list_core(i)
+      do j = 1, n_act_orb
+       jorb = list_act(j)
+       Fock_matrix_mo_complex(iorb,jorb) = (0.d0,0.d0)
+       Fock_matrix_mo_complex(jorb,iorb) = (0.d0,0.d0)
+      enddo
+     enddo
+   endif
+
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_alpha_complex, (mo_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo_complex(Fock_matrix_ao_alpha_complex,size(Fock_matrix_ao_alpha_complex,1), &
+                 Fock_matrix_mo_alpha_complex,size(Fock_matrix_mo_alpha_complex,1))
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_beta_complex, (mo_num,mo_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo_complex(Fock_matrix_ao_beta_complex,size(Fock_matrix_ao_beta_complex,1), &
+                 Fock_matrix_mo_beta_complex,size(Fock_matrix_mo_beta_complex,1))
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_complex, (ao_num, ao_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Fock matrix in AO basis set
+ END_DOC
+
+ if(frozen_orb_scf)then
+   call mo_to_ao_complex(Fock_matrix_mo_complex,size(Fock_matrix_mo_complex,1),              &
+       Fock_matrix_ao_complex,size(Fock_matrix_ao_complex,1))
+ else
+   if ( (elec_alpha_num == elec_beta_num).and.                       &
+         (level_shift == 0.) )                                       &
+         then
+     integer                        :: i,j
+     do j=1,ao_num
+       do i=1,ao_num
+         Fock_matrix_ao_complex(i,j) = Fock_matrix_ao_alpha_complex(i,j)
+       enddo
+     enddo
+   else
+     call mo_to_ao_complex(Fock_matrix_mo_complex,size(Fock_matrix_mo_complex,1),            &
+         Fock_matrix_ao_complex,size(Fock_matrix_ao_complex,1))
+   endif
+ endif
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ complex*16, ao_two_e_integral_alpha_complex, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ complex*16, ao_two_e_integral_beta_complex ,  (ao_num, ao_num) ]
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Alpha and Beta Fock matrices in AO basis set
+  END_DOC
+  !TODO: finish implementing this: see complex qp1 (different mapping)
+ 
+  integer                        :: i,j,k,l,k1,r,s
+  integer                        :: i0,j0,k0,l0
+  integer*8                      :: p,q
+  complex*16               :: integral, c0
+  complex*16, allocatable  :: ao_two_e_integral_alpha_tmp(:,:)
+  complex*16, allocatable  :: ao_two_e_integral_beta_tmp(:,:)
+ 
+  ao_two_e_integral_alpha_complex = (0.d0,0.d0)
+  ao_two_e_integral_beta_complex  = (0.d0,0.d0)
+  PROVIDE ao_two_e_integrals_in_map
+ 
+  integer(omp_lock_kind) :: lck(ao_num)
+  integer(map_size_kind)     :: i8
+  integer                        :: ii(4), jj(4), kk(4), ll(4), k2
+  integer(cache_map_size_kind)   :: n_elements_max, n_elements
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  complex*16, parameter    :: i_sign(4) = (/(0.d0,1.d0),(0.d0,1.d0),(0.d0,-1.d0),(0.d0,-1.d0)/)
+  integer(key_kind) :: key1
+ 
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
+      !$OMP  n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, &
+      !$OMP  c0,key1)&
+      !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha_complex, &
+      !$OMP  SCF_density_matrix_ao_beta_complex, &
+      !$OMP  ao_integrals_map, ao_two_e_integral_alpha_complex, ao_two_e_integral_beta_complex)
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+  allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
+           ao_two_e_integral_beta_tmp(ao_num,ao_num))
+  ao_two_e_integral_alpha_tmp = (0.d0,0.d0)
+  ao_two_e_integral_beta_tmp  = (0.d0,0.d0)
+ 
+  !$OMP DO SCHEDULE(static,1)
+  do i8=0_8,ao_integrals_map%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_1(ii,jj,kk,ll,key1)
+      ! i<=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part (even)
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          integral = i_sign(k2)*values(k1) !for klij and lkji, take complex conjugate
+
+          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+
+          c0 = (scf_density_matrix_ao_alpha_complex(l,j)+scf_density_matrix_ao_beta_complex(l,j)) * integral
+
+          ao_two_e_integral_alpha_tmp(i,k) += c0
+          ao_two_e_integral_beta_tmp (i,k) += c0
+
+          ao_two_e_integral_alpha_tmp(i,l) -= SCF_density_matrix_ao_alpha_complex(k,j) * integral
+          ao_two_e_integral_beta_tmp (i,l) -= scf_density_matrix_ao_beta_complex (k,j) * integral
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          integral = values(k1)
+
+          c0 = (scf_density_matrix_ao_alpha_complex(l,j)+scf_density_matrix_ao_beta_complex(l,j)) * integral
+
+          ao_two_e_integral_alpha_tmp(i,k) += c0
+          ao_two_e_integral_beta_tmp (i,k) += c0
+
+          ao_two_e_integral_alpha_tmp(i,l) -= SCF_density_matrix_ao_alpha_complex(k,j) * integral
+          ao_two_e_integral_beta_tmp (i,l) -= scf_density_matrix_ao_beta_complex (k,j) * integral
+        enddo
+      endif
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  ao_two_e_integral_alpha_complex += ao_two_e_integral_alpha_tmp
+  ao_two_e_integral_beta_complex  += ao_two_e_integral_beta_tmp
+  !$OMP END CRITICAL
+  deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
+  !$OMP END PARALLEL
+
+ 
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
+      !$OMP  n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, &
+      !$OMP  c0,key1)&
+      !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha_complex, &
+      !$OMP   SCF_density_matrix_ao_beta_complex, &
+      !$OMP  ao_integrals_map_2, ao_two_e_integral_alpha_complex, ao_two_e_integral_beta_complex)
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map_2,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+  allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), &
+           ao_two_e_integral_beta_tmp(ao_num,ao_num))
+  ao_two_e_integral_alpha_tmp = (0.d0,0.d0)
+  ao_two_e_integral_beta_tmp  = (0.d0,0.d0)
+ 
+  !$OMP DO SCHEDULE(static,1)
+  do i8=0_8,ao_integrals_map_2%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map_2,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_2(ii,jj,kk,ll,key1)
+      ! i>=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          integral = i_sign(k2)*values(k1) ! for klij and lkji, take conjugate
+
+          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+
+          c0 = (scf_density_matrix_ao_alpha_complex(l,j)+scf_density_matrix_ao_beta_complex(l,j)) * integral
+
+          ao_two_e_integral_alpha_tmp(i,k) += c0
+          ao_two_e_integral_beta_tmp (i,k) += c0
+
+          ao_two_e_integral_alpha_tmp(i,l) -= SCF_density_matrix_ao_alpha_complex(k,j) * integral
+          ao_two_e_integral_beta_tmp (i,l) -= scf_density_matrix_ao_beta_complex (k,j) * integral
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          integral = values(k1)
+
+          c0 = (scf_density_matrix_ao_alpha_complex(l,j)+scf_density_matrix_ao_beta_complex(l,j)) * integral
+
+          ao_two_e_integral_alpha_tmp(i,k) += c0
+          ao_two_e_integral_beta_tmp (i,k) += c0
+
+          ao_two_e_integral_alpha_tmp(i,l) -= SCF_density_matrix_ao_alpha_complex(k,j) * integral
+          ao_two_e_integral_beta_tmp (i,l) -= scf_density_matrix_ao_beta_complex (k,j) * integral
+        enddo
+      endif
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  ao_two_e_integral_alpha_complex += ao_two_e_integral_alpha_tmp
+  ao_two_e_integral_beta_complex  += ao_two_e_integral_beta_tmp
+  !$OMP END CRITICAL
+  deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
+  !$OMP END PARALLEL
+
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_alpha_complex, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_beta_complex,  (ao_num, ao_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Alpha Fock matrix in AO basis set
+ END_DOC
+
+ integer                        :: i,j
+ do j=1,ao_num
+   do i=1,ao_num
+     Fock_matrix_ao_alpha_complex(i,j) = ao_one_e_integrals_complex(i,j) + ao_two_e_integral_alpha_complex(i,j)
+     Fock_matrix_ao_beta_complex (i,j) = ao_one_e_integrals_complex(i,j) + ao_two_e_integral_beta_complex (i,j)
+   enddo
+ enddo
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_mo_kpts_real, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+  implicit none
+  integer :: i,j,k
+  do k=1,kpt_num
+    do j=1,mo_num_per_kpt
+      do i=1,mo_num_per_kpt
+        fock_matrix_mo_kpts_real(i,j,k) = dble(fock_matrix_mo_kpts(i,j,k))
+      enddo
+    enddo
+  enddo
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+ BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_diag_mo_kpts, (mo_num_per_kpt,kpt_num)]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis.
+   ! For open shells, the ROHF Fock Matrix is ::
+   !
+   !       |   F-K    |  F + K/2  |    F     |
+   !       |---------------------------------|
+   !       | F + K/2  |     F     |  F - K/2 |
+   !       |---------------------------------|
+   !       |    F     |  F - K/2  |  F + K   |
+   !
+   !
+   ! F = 1/2 (Fa + Fb)
+   !
+   ! K = Fb - Fa
+   !
+   END_DOC
+   integer                        :: i,j,n,k
+   !todo: fix for kpts? (okay for simple cases)
+   if (elec_alpha_num == elec_beta_num) then
+     Fock_matrix_mo_kpts = Fock_matrix_mo_alpha_kpts
+   else
+     do k=1,kpt_num
+       do j=1,elec_beta_num_kpts(k)
+         ! F-K
+         do i=1,elec_beta_num_kpts(k) !CC
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))&
+               - (Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+         ! F+K/2
+         do i=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k)  !CA
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))&
+               + 0.5d0*(Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+         ! F
+         do i=elec_alpha_num_kpts(k)+1, mo_num_per_kpt !CV
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))
+         enddo
+       enddo
+
+       do j=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k)
+         ! F+K/2
+         do i=1,elec_beta_num_kpts(k) !AC
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))&
+               + 0.5d0*(Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+         ! F
+         do i=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k) !AA
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))
+         enddo
+         ! F-K/2
+         do i=elec_alpha_num_kpts(k)+1, mo_num_per_kpt !AV
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))&
+               - 0.5d0*(Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+       enddo
+
+       do j=elec_alpha_num_kpts(k)+1, mo_num_per_kpt
+         ! F
+         do i=1,elec_beta_num_kpts(k) !VC
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))
+         enddo
+         ! F-K/2
+         do i=elec_beta_num_kpts(k)+1,elec_alpha_num_kpts(k) !VA
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k))&
+               - 0.5d0*(Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+         ! F+K
+         do i=elec_alpha_num_kpts(k)+1,mo_num_per_kpt !VV
+           Fock_matrix_mo_kpts(i,j,k) = 0.5d0*(Fock_matrix_mo_alpha_kpts(i,j,k)+Fock_matrix_mo_beta_kpts(i,j,k)) &
+               + (Fock_matrix_mo_beta_kpts(i,j,k) - Fock_matrix_mo_alpha_kpts(i,j,k))
+         enddo
+       enddo
+     enddo
+
+   endif
+   do k=1,kpt_num
+     do i = 1, mo_num_per_kpt
+       Fock_matrix_diag_mo_kpts(i,k) = dble(Fock_matrix_mo_kpts(i,i,k))
+       if (dabs(dimag(Fock_matrix_mo_kpts(i,i,k))) .gt. 1.0d-12) then
+         !stop 'diagonal elements of Fock matrix should be real'
+         print *, 'diagonal elements of Fock matrix should be real',i,Fock_matrix_mo_kpts(i,i,k)
+         !stop -1
+       endif
+     enddo
+   enddo
+
+
+   if(frozen_orb_scf)then
+     integer                        :: iorb,jorb
+     do k=1,kpt_num
+       ! for tags: list_core, n_core_orb, n_act_orb, list_act
+       do i = 1, n_core_orb_kpts(k)
+         iorb = list_core_kpts(i,k)
+         do j = 1, n_act_orb_kpts(k)
+           jorb = list_act_kpts(j,k)
+           fock_matrix_mo_kpts(iorb,jorb,k) = (0.d0,0.d0)
+           fock_matrix_mo_kpts(jorb,iorb,k) = (0.d0,0.d0)
+         enddo
+       enddo
+     enddo
+   endif
+
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_alpha_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo_kpts(Fock_matrix_ao_alpha_kpts,size(Fock_matrix_ao_alpha_kpts,1), &
+                 Fock_matrix_mo_alpha_kpts,size(Fock_matrix_mo_alpha_kpts,1))
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_mo_beta_kpts, (mo_num_per_kpt,mo_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo_kpts(Fock_matrix_ao_beta_kpts,size(Fock_matrix_ao_beta_kpts,1), &
+                 Fock_matrix_mo_beta_kpts,size(Fock_matrix_mo_beta_kpts,1))
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_kpts, (ao_num_per_kpt, ao_num_per_kpt,kpt_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Fock matrix in AO basis set
+ END_DOC
+
+ if(frozen_orb_scf)then
+   call mo_to_ao_kpts(Fock_matrix_mo_kpts,size(Fock_matrix_mo_kpts,1),              &
+       Fock_matrix_ao_kpts,size(Fock_matrix_ao_kpts,1))
+ else
+   integer :: k
+   do k=1,kpt_num
+     if ( (elec_alpha_num_kpts(k) == elec_beta_num_kpts(k)).and.                       &
+           (level_shift == 0.) )                                       &
+           then
+       integer                        :: i,j
+       do j=1,ao_num_per_kpt
+         do i=1,ao_num_per_kpt
+           Fock_matrix_ao_kpts(i,j,k) = Fock_matrix_ao_alpha_kpts(i,j,k)
+         enddo
+       enddo
+     else
+       !call mo_to_ao_complex(Fock_matrix_mo_kpts,size(Fock_matrix_mo_kpts,1),            &
+       call mo_to_ao_kpts(Fock_matrix_mo_kpts,size(Fock_matrix_mo_kpts,1),            &
+           Fock_matrix_ao_kpts,size(Fock_matrix_ao_kpts,1))
+     endif
+   enddo
+ endif
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ complex*16, ao_two_e_integral_alpha_kpts, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, ao_two_e_integral_beta_kpts ,  (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Alpha and Beta Fock matrices in AO basis set
+  END_DOC
+  !TODO: finish implementing this: see complex qp1 (different mapping)
+ 
+  integer                        :: i,j,k,l,k1,r,s
+  integer                        :: i0,j0,k0,l0
+  integer*8                      :: p,q
+  complex*16               :: integral, c0
+  complex*16, allocatable  :: ao_two_e_integral_alpha_tmp(:,:,:)
+  complex*16, allocatable  :: ao_two_e_integral_beta_tmp(:,:,:)
+ 
+  ao_two_e_integral_alpha_kpts = (0.d0,0.d0)
+  ao_two_e_integral_beta_kpts  = (0.d0,0.d0)
+  PROVIDE ao_two_e_integrals_in_map scf_density_matrix_ao_alpha_kpts scf_density_matrix_ao_beta_kpts
+ 
+  integer(omp_lock_kind) :: lck(ao_num)
+  integer(map_size_kind)     :: i8
+  integer                        :: ii(4), jj(4), kk(4), ll(4), k2
+  integer(cache_map_size_kind)   :: n_elements_max, n_elements
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  complex*16, parameter    :: i_sign(4) = (/(0.d0,1.d0),(0.d0,1.d0),(0.d0,-1.d0),(0.d0,-1.d0)/)
+  integer(key_kind) :: key1
+  integer :: kpt_i,kpt_j,kpt_k,kpt_l,idx_i,idx_j,idx_k,idx_l
+ 
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
+      !$OMP  n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, &
+      !$OMP  kpt_i,kpt_j,kpt_k,kpt_l,idx_i,idx_j,idx_k,idx_l, &
+      !$OMP  c0,key1)&
+      !$OMP SHARED(ao_num_per_kpt,SCF_density_matrix_ao_alpha_kpts, kpt_num, irp_here, &
+      !$OMP  SCF_density_matrix_ao_beta_kpts, &
+      !$OMP  ao_integrals_map, ao_two_e_integral_alpha_kpts, ao_two_e_integral_beta_kpts)
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+  allocate(ao_two_e_integral_alpha_tmp(ao_num_per_kpt,ao_num_per_kpt,kpt_num), &
+           ao_two_e_integral_beta_tmp(ao_num_per_kpt,ao_num_per_kpt,kpt_num))
+  ao_two_e_integral_alpha_tmp = (0.d0,0.d0)
+  ao_two_e_integral_beta_tmp  = (0.d0,0.d0)
+ 
+  !$OMP DO SCHEDULE(static,1)
+  do i8=0_8,ao_integrals_map%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_1(ii,jj,kk,ll,key1)
+      ! i<=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part (even)
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
+          integral = i_sign(k2)*values(k1) !for klij and lkji, take complex conjugate
+
+          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+
+          if (kpt_l.eq.kpt_j) then
+            c0 = (scf_density_matrix_ao_alpha_kpts(idx_l,idx_j,kpt_j)+scf_density_matrix_ao_beta_kpts(idx_l,idx_j,kpt_j))*integral
+            if(kpt_i.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_k,kpt_i) += c0
+            ao_two_e_integral_beta_tmp (idx_i,idx_k,kpt_i) += c0
+          endif
+
+          if (kpt_l.eq.kpt_i) then
+            if(kpt_j.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_l,kpt_i) -= SCF_density_matrix_ao_alpha_kpts(idx_k,idx_j,kpt_j) * integral
+            ao_two_e_integral_beta_tmp (idx_i,idx_l,kpt_i) -= scf_density_matrix_ao_beta_kpts (idx_k,idx_j,kpt_j) * integral
+          endif
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
+          integral = values(k1)
+
+          if (kpt_l.eq.kpt_j) then
+            c0 = (scf_density_matrix_ao_alpha_kpts(idx_l,idx_j,kpt_j)+scf_density_matrix_ao_beta_kpts(idx_l,idx_j,kpt_j))*integral
+            if(kpt_i.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_k,kpt_i) += c0
+            ao_two_e_integral_beta_tmp (idx_i,idx_k,kpt_i) += c0
+          endif
+
+          if (kpt_l.eq.kpt_i) then
+            if(kpt_j.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_l,kpt_i) -= SCF_density_matrix_ao_alpha_kpts(idx_k,idx_j,kpt_j) * integral
+            ao_two_e_integral_beta_tmp (idx_i,idx_l,kpt_i) -= scf_density_matrix_ao_beta_kpts (idx_k,idx_j,kpt_j) * integral
+          endif
+        enddo
+      endif
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  ao_two_e_integral_alpha_kpts += ao_two_e_integral_alpha_tmp
+  ao_two_e_integral_beta_kpts  += ao_two_e_integral_beta_tmp
+  !$OMP END CRITICAL
+  deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
+  !$OMP END PARALLEL
+
+ 
+  !$OMP PARALLEL DEFAULT(NONE)                                      &
+      !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
+      !$OMP  n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, &
+      !$OMP  kpt_i,kpt_j,kpt_k,kpt_l,idx_i,idx_j,idx_k,idx_l, &
+      !$OMP  c0,key1)&
+      !$OMP SHARED(ao_num_per_kpt,SCF_density_matrix_ao_alpha_kpts,kpt_num, irp_here, &
+      !$OMP   SCF_density_matrix_ao_beta_kpts, &
+      !$OMP  ao_integrals_map_2, ao_two_e_integral_alpha_kpts, ao_two_e_integral_beta_kpts)
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map_2,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+  allocate(ao_two_e_integral_alpha_tmp(ao_num_per_kpt,ao_num_per_kpt,kpt_num), &
+           ao_two_e_integral_beta_tmp(ao_num_per_kpt,ao_num_per_kpt,kpt_num))
+  ao_two_e_integral_alpha_tmp = (0.d0,0.d0)
+  ao_two_e_integral_beta_tmp  = (0.d0,0.d0)
+ 
+  !$OMP DO SCHEDULE(static,1)
+  do i8=0_8,ao_integrals_map_2%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map_2,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_2(ii,jj,kk,ll,key1)
+      ! i>=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
+          integral = i_sign(k2)*values(k1) ! for klij and lkji, take conjugate
+
+          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+
+          if (kpt_l.eq.kpt_j) then
+            c0 = (scf_density_matrix_ao_alpha_kpts(idx_l,idx_j,kpt_j)+scf_density_matrix_ao_beta_kpts(idx_l,idx_j,kpt_j))*integral
+            if(kpt_i.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_k,kpt_i) += c0
+            ao_two_e_integral_beta_tmp (idx_i,idx_k,kpt_i) += c0
+          endif
+
+          if (kpt_l.eq.kpt_i) then
+            if(kpt_j.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_l,kpt_i) -= SCF_density_matrix_ao_alpha_kpts(idx_k,idx_j,kpt_j) * integral
+            ao_two_e_integral_beta_tmp (idx_i,idx_l,kpt_i) -= scf_density_matrix_ao_beta_kpts (idx_k,idx_j,kpt_j) * integral
+          endif
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          call get_kpt_idx_ao(i,kpt_i,idx_i)
+          call get_kpt_idx_ao(j,kpt_j,idx_j)
+          call get_kpt_idx_ao(k,kpt_k,idx_k)
+          call get_kpt_idx_ao(l,kpt_l,idx_l)
+          integral = values(k1)
+
+          if (kpt_l.eq.kpt_j) then
+            c0 = (scf_density_matrix_ao_alpha_kpts(idx_l,idx_j,kpt_j)+scf_density_matrix_ao_beta_kpts(idx_l,idx_j,kpt_j))*integral
+            if(kpt_i.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_k,kpt_i) += c0
+            ao_two_e_integral_beta_tmp (idx_i,idx_k,kpt_i) += c0
+          endif
+
+          if (kpt_l.eq.kpt_i) then
+            if(kpt_j.ne.kpt_k) then
+              print*,'problem in ',irp_here,' ikjl: ',kpt_i,kpt_k,kpt_j,kpt_l
+              stop 1
+            endif
+            ao_two_e_integral_alpha_tmp(idx_i,idx_l,kpt_i) -= SCF_density_matrix_ao_alpha_kpts(idx_k,idx_j,kpt_j) * integral
+            ao_two_e_integral_beta_tmp (idx_i,idx_l,kpt_i) -= scf_density_matrix_ao_beta_kpts (idx_k,idx_j,kpt_j) * integral
+          endif
+        enddo
+      endif
+    enddo
+  enddo
+  !$OMP END DO NOWAIT
+  !$OMP CRITICAL
+  ao_two_e_integral_alpha_kpts += ao_two_e_integral_alpha_tmp
+  ao_two_e_integral_beta_kpts  += ao_two_e_integral_beta_tmp
+  !$OMP END CRITICAL
+  deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)
+  !$OMP END PARALLEL
+
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_alpha_kpts, (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+&BEGIN_PROVIDER [ complex*16, Fock_matrix_ao_beta_kpts,  (ao_num_per_kpt, ao_num_per_kpt, kpt_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Alpha Fock matrix in AO basis set
+ END_DOC
+
+ integer                        :: i,j,k
+ do k=1,kpt_num
+   do j=1,ao_num_per_kpt
+     do i=1,ao_num_per_kpt
+       Fock_matrix_ao_alpha_kpts(i,j,k) = ao_one_e_integrals_kpts(i,j,k) + ao_two_e_integral_alpha_kpts(i,j,k)
+       Fock_matrix_ao_beta_kpts (i,j,k) = ao_one_e_integrals_kpts(i,j,k) + ao_two_e_integral_beta_kpts (i,j,k)
+     enddo
+   enddo
+ enddo
+
+END_PROVIDER
diff --git a/src/scf_utils/huckel_cplx.irp.f b/src/scf_utils/huckel_cplx.irp.f
new file mode 100644
index 00000000..346999df
--- /dev/null
+++ b/src/scf_utils/huckel_cplx.irp.f
@@ -0,0 +1,140 @@
+subroutine huckel_guess_complex
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j
+  double precision               :: accu
+  double precision               :: c
+  character*(64)                 :: label
+  complex*16, allocatable  :: A(:,:)
+  label = "Guess"
+  c = 0.5d0 * 1.75d0
+
+  allocate (A(ao_num, ao_num))
+  A = 0.d0
+  do j=1,ao_num
+    do i=1,ao_num
+      A(i,j) = c * ao_overlap_complex(i,j) * (ao_one_e_integrals_diag_complex(i) + ao_one_e_integrals_diag_complex(j))
+    enddo
+    A(j,j) = ao_one_e_integrals_diag_complex(j) + dble(ao_two_e_integral_alpha_complex(j,j))
+    if (dabs(dimag(ao_two_e_integral_alpha_complex(j,j))) .gt. 1.0d-10) then
+      stop 'diagonal elements of ao_two_e_integral_alpha should be real'
+    endif
+  enddo
+
+!  Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+!  Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+  call zlacpy('X', ao_num, ao_num, A, size(A,1), &
+         Fock_matrix_ao_alpha_complex, size(Fock_matrix_ao_alpha_complex,1))
+  call zlacpy('X', ao_num, ao_num, A, size(A,1), &
+         Fock_matrix_ao_beta_complex,  size(Fock_matrix_ao_beta_complex, 1))
+  
+
+!  TOUCH mo_coef
+
+  TOUCH Fock_matrix_ao_alpha_complex Fock_matrix_ao_beta_complex
+  mo_coef_complex = eigenvectors_fock_matrix_mo_complex
+  SOFT_TOUCH mo_coef_complex
+  call save_mos
+  deallocate(A)
+
+end
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+subroutine huckel_guess_kpts
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j,k
+  double precision               :: accu
+  double precision               :: c
+  character*(64)                 :: label
+  complex*16, allocatable  :: A(:,:)
+  label = "Guess"
+  c = 0.5d0 * 1.75d0
+
+  allocate (A(ao_num, ao_num))
+  do k=1,kpt_num
+    A = (0.d0,0.d0)
+    do j=1,ao_num_per_kpt
+      do i=1,ao_num_per_kpt
+        A(i,j) = c * ao_overlap_kpts(i,j,k) * (ao_one_e_integrals_diag_kpts(i,k) + ao_one_e_integrals_diag_kpts(j,k))
+      enddo
+      A(j,j) = ao_one_e_integrals_diag_kpts(j,k) + dble(ao_two_e_integral_alpha_kpts(j,j,k))
+      if (dabs(dimag(ao_two_e_integral_alpha_kpts(j,j,k))) .gt. 1.0d-10) then
+        stop 'diagonal elements of ao_two_e_integral_alpha should be real'
+      endif
+    enddo
+
+!    Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+!    Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+    call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_alpha_kpts(:,:,k), size(Fock_matrix_ao_alpha_kpts,1))
+    call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_beta_kpts(:,:,k),  size(Fock_matrix_ao_beta_kpts, 1))
+  enddo  
+
+!  TOUCH mo_coef
+
+  !TOUCH fock_matrix_ao_alpha_complex fock_matrix_ao_beta_kpts
+  TOUCH fock_matrix_ao_alpha_kpts fock_matrix_ao_beta_kpts
+  mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts
+  SOFT_TOUCH mo_coef_kpts
+  call save_mos
+  deallocate(A)
+
+end
+subroutine huckel_guess_kpts_real
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j,k
+  double precision               :: accu
+  double precision               :: c
+  character*(64)                 :: label
+  !complex*16, allocatable  :: A(:,:)
+  double precision, allocatable  :: A(:,:)
+  label = "Guess"
+  c = 0.5d0 * 1.75d0
+
+  allocate (A(ao_num_per_kpt, ao_num_per_kpt))
+  do k=1,kpt_num
+    A = 0.d0
+    do j=1,ao_num_per_kpt
+      do i=1,ao_num_per_kpt
+        A(i,j) = c * ao_overlap_kpts_real(i,j,k) * (ao_one_e_integrals_diag_kpts(i,k) + ao_one_e_integrals_diag_kpts(j,k))
+      enddo
+      A(j,j) = ao_one_e_integrals_diag_kpts(j,k) + dble(ao_two_e_integral_alpha_kpts(j,j,k))
+      if (dabs(dimag(ao_two_e_integral_alpha_kpts(j,j,k))) .gt. 1.0d-10) then
+        stop 'diagonal elements of ao_two_e_integral_alpha should be real'
+      endif
+    enddo
+
+!    Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+!    Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+    call zlacp2('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_alpha_kpts(:,:,k), size(Fock_matrix_ao_alpha_kpts,1))
+    call zlacp2('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+           Fock_matrix_ao_beta_kpts(:,:,k),  size(Fock_matrix_ao_beta_kpts, 1))
+    !call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+    !       Fock_matrix_ao_alpha_kpts(:,:,k), size(Fock_matrix_ao_alpha_kpts,1))
+    !call zlacpy('X', ao_num_per_kpt, ao_num_per_kpt, A, size(A,1), &
+    !       Fock_matrix_ao_beta_kpts(:,:,k),  size(Fock_matrix_ao_beta_kpts, 1))
+  enddo  
+
+!  TOUCH mo_coef
+
+  !TOUCH fock_matrix_ao_alpha_complex fock_matrix_ao_beta_kpts
+  TOUCH fock_matrix_ao_alpha_kpts fock_matrix_ao_beta_kpts
+  mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts_real
+  SOFT_TOUCH mo_coef_kpts
+  call save_mos
+  deallocate(A)
+
+end
diff --git a/src/scf_utils/print_debug_scf_cplx.irp.f b/src/scf_utils/print_debug_scf_cplx.irp.f
new file mode 100644
index 00000000..65a047c3
--- /dev/null
+++ b/src/scf_utils/print_debug_scf_cplx.irp.f
@@ -0,0 +1,54 @@
+subroutine print_debug_scf_complex
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j
+  
+  write(*,'(A)') 'mo_coef_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') mo_coef_complex(i,:)
+  enddo
+  write(*,'(A)') 'scf_density_matrix_ao_alpha_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') scf_density_matrix_ao_alpha_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_one_e_integrals_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_one_e_integrals_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_two_e_integral_alpha_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_two_e_integral_alpha_complex(i,:)
+  enddo
+  write(*,'(A)') 'fock_matrix_ao_alpha_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') fock_matrix_ao_alpha_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_overlap_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_overlap_complex(i,:)
+  enddo
+  write(*,'(A)') 'scf_density_matrix_ao_beta_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') scf_density_matrix_ao_beta_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_two_e_integral_beta_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_two_e_integral_beta_complex(i,:)
+  enddo
+  write(*,'(A)') 'fock_matrix_ao_beta_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') fock_matrix_ao_beta_complex(i,:)
+  enddo
+
+end
diff --git a/src/scf_utils/roothaan_hall_scf_cplx.irp.f b/src/scf_utils/roothaan_hall_scf_cplx.irp.f
new file mode 100644
index 00000000..64c3b16f
--- /dev/null
+++ b/src/scf_utils/roothaan_hall_scf_cplx.irp.f
@@ -0,0 +1,844 @@
+subroutine Roothaan_Hall_SCF_complex
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  complex*16, allocatable  :: Fock_matrix_DIIS(:,:,:),error_matrix_DIIS(:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  complex*16, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num),                          &
+      Fock_matrix_DIIS (ao_num,ao_num,max_dim_DIIS),                 &
+      error_matrix_DIIS(ao_num,ao_num,max_dim_DIIS)                  &
+      )
+
+  call write_time(6)
+
+  print*,'Energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  PROVIDE FPS_SPF_matrix_AO_complex Fock_matrix_AO_complex 
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+! Increment cycle number
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+
+    if (scf_algorithm == 'DIIS') then
+
+      ! Store Fock and error matrices at each iteration
+      do j=1,ao_num
+        do i=1,ao_num
+          index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+          Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO_complex(i,j)
+          error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO_complex(i,j)
+        enddo
+      enddo
+
+      ! Compute the extrapolated Fock matrix
+
+      call extrapolate_Fock_matrix_complex(                            &
+          error_matrix_DIIS,Fock_matrix_DIIS,                          &
+          Fock_matrix_AO_complex,size(Fock_matrix_AO_complex,1),       &
+          iteration_SCF,dim_DIIS                                       &
+          )
+
+      Fock_matrix_AO_alpha_complex = Fock_matrix_AO_complex*0.5d0
+      Fock_matrix_AO_beta_complex  = Fock_matrix_AO_complex*0.5d0
+      TOUCH Fock_matrix_AO_alpha_complex Fock_matrix_AO_beta_complex
+
+    endif
+
+    mo_coef_complex = eigenvectors_fock_matrix_mo_complex
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH mo_coef_complex
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(cdabs(FPS_SPF_Matrix_MO_complex))
+
+!   SCF energy
+!    call print_debug_scf_complex
+    energy_SCF = scf_energy
+    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+      Fock_matrix_AO_complex(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
+      Fock_matrix_AO_alpha_complex = Fock_matrix_AO_complex*0.5d0
+      Fock_matrix_AO_beta_complex  = Fock_matrix_AO_complex*0.5d0
+      TOUCH Fock_matrix_AO_alpha_complex Fock_matrix_AO_beta_complex
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num,1:mo_num) = mo_coef_complex(1:ao_num,1:mo_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef_complex(1:ao_num,1:mo_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef_complex level_shift
+      mo_coef_complex(1:ao_num,1:mo_num) = eigenvectors_fock_matrix_mo_complex(1:ao_num,1:mo_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef_complex
+      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+      dim_DIIS=0
+    enddo
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_scf, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if (qp_stop()) exit
+
+  enddo
+
+ if (iteration_SCF < n_it_SCF_max) then
+   mo_label = "Canonical"
+ endif
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix_complex(Fock_matrix_mo_complex,size(Fock_matrix_mo_complex,1),size(Fock_matrix_mo_complex,2),mo_label,1,.true.)
+   call save_mos
+  endif
+
+  call write_double(6, Energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
+subroutine extrapolate_Fock_matrix_complex(       &
+           error_matrix_DIIS,Fock_matrix_DIIS,    &
+           Fock_matrix_AO_,size_Fock_matrix_AO,   &
+           iteration_SCF,dim_DIIS                 &
+           )
+
+BEGIN_DOC
+! Compute the extrapolated Fock matrix using the DIIS procedure
+END_DOC
+
+  implicit none
+
+  complex*16,intent(in)   :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*)
+  integer,intent(in)            :: iteration_SCF, size_Fock_matrix_AO
+  complex*16,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num)
+  integer,intent(inout)         :: dim_DIIS
+
+  double precision,allocatable  :: B_matrix_DIIS(:,:),X_vector_DIIS(:)
+  double precision,allocatable  :: C_vector_DIIS(:)
+  double precision              :: accum_im, thr_im
+  complex*16,allocatable  :: scratch(:,:)
+  integer                       :: i,j,k,i_DIIS,j_DIIS
+  thr_im = 1.0d-10
+  allocate(                               &
+    B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), &
+    X_vector_DIIS(dim_DIIS+1),            &
+    C_vector_DIIS(dim_DIIS+1),            &
+    scratch(ao_num,ao_num)                &
+  )
+
+! Compute the matrices B and X
+  do j=1,dim_DIIS
+    do i=1,dim_DIIS
+
+      j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1
+      i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1
+
+! Compute product of two errors vectors
+
+      call zgemm('N','N',ao_num,ao_num,ao_num,                      &
+           (1.d0,0.d0),                                             &
+           error_matrix_DIIS(1,1,i_DIIS),size(error_matrix_DIIS,1), &
+           error_matrix_DIIS(1,1,j_DIIS),size(error_matrix_DIIS,1), &
+           (0.d0,0.d0),                                             &
+           scratch,size(scratch,1))
+
+! Compute Trace
+
+      B_matrix_DIIS(i,j) = 0.d0
+      accum_im = 0.d0
+      do k=1,ao_num
+        B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + dble(scratch(k,k))
+        accum_im = accum_im + dimag(scratch(k,k))
+      enddo
+      if (dabs(accum_im) .gt. thr_im) then
+        !stop 'problem with imaginary parts in DIIS B_matrix?'
+        print*, 'problem with imaginary parts in DIIS B_matrix?',accum_im
+      endif
+    enddo
+  enddo
+  deallocate(scratch)
+! Pad B matrix and build the X matrix
+
+  do i=1,dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+    C_vector_DIIS(i) = 0.d0
+  enddo
+  B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+! Solve the linear system C = B.X
+
+  integer              :: info
+  integer,allocatable  :: ipiv(:)
+
+  allocate(          &
+    ipiv(dim_DIIS+1) &
+  )
+
+  double precision, allocatable :: AF(:,:),scratch_d1(:)
+  allocate (AF(dim_DIIS+1,dim_DIIS+1),scratch_d1(1))
+  double precision :: rcond, ferr, berr
+  integer :: iwork(dim_DIIS+1), lwork
+
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch_d1,-1,                       &
+    iwork,                               &
+    info                                 &
+  )
+  lwork = int(scratch_d1(1))
+  deallocate(scratch_d1)
+  allocate(scratch_d1(lwork))
+
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch_d1,size(scratch_d1),         &
+    iwork,                               &
+    info                                 &
+  )
+  deallocate(scratch_d1,ipiv)
+
+ if(info < 0) then
+   stop 'bug in DIIS'
+ endif
+
+ if (rcond > 1.d-12) then
+
+  ! Compute extrapolated Fock matrix
+
+
+      !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+      do j=1,ao_num
+        do i=1,ao_num
+          Fock_matrix_AO_(i,j) = (0.d0,0.d0)
+        enddo
+        do k=1,dim_DIIS
+          do i=1,ao_num
+            Fock_matrix_AO_(i,j) = Fock_matrix_AO_(i,j) +            &
+                X_vector_DIIS(k)*Fock_matrix_DIIS(i,j,dim_DIIS-k+1)
+          enddo
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+
+  else
+    dim_DIIS = 0
+  endif
+
+end
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+subroutine Roothaan_Hall_SCF_kpts
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  complex*16, allocatable  :: Fock_matrix_DIIS(:,:,:,:),error_matrix_DIIS(:,:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+
+  integer                        :: i,j,k,kk
+  logical, external              :: qp_stop
+  complex*16, allocatable :: mo_coef_save(:,:,:)
+
+  PROVIDE ao_md5 mo_occ_kpts level_shift
+
+  allocate(mo_coef_save(ao_num_per_kpt,mo_num_per_kpt,kpt_num),                          &
+      Fock_matrix_DIIS (ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num),                 &
+      error_matrix_DIIS(ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num)                  &
+      )
+      !todo: add kpt_num dim to diis mats? (3 or 4)
+  call write_time(6)
+
+  print*,'Energy of the guess = ',scf_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  !PROVIDE fps_spf_matrix_ao_complex fock_matrix_ao_complex 
+  PROVIDE fps_spf_matrix_ao_kpts fock_matrix_ao_kpts 
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+! Increment cycle number
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+
+    if (scf_algorithm == 'DIIS') then
+
+      do kk=1,kpt_num
+        ! Store Fock and error matrices at each iteration
+        do j=1,ao_num_per_kpt
+          do i=1,ao_num_per_kpt
+            index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+            Fock_matrix_DIIS (i,j,index_dim_DIIS,kk) = fock_matrix_ao_kpts(i,j,kk)
+            error_matrix_DIIS(i,j,index_dim_DIIS,kk) = fps_spf_matrix_ao_kpts(i,j,kk)
+          enddo
+        enddo
+
+        ! Compute the extrapolated Fock matrix
+
+        call extrapolate_fock_matrix_kpts(                            &
+            error_matrix_DIIS(1,1,1,kk),Fock_matrix_DIIS(1,1,1,kk),        &
+            Fock_matrix_AO_kpts(1,1,kk),size(Fock_matrix_AO_kpts,1),       &
+            iteration_SCF,dim_DIIS                                       &
+            )
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH Fock_matrix_AO_alpha_kpts Fock_matrix_AO_beta_kpts
+
+    endif
+
+    mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH mo_coef_kpts
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(cdabs(FPS_SPF_Matrix_MO_kpts))
+
+!   SCF energy
+!    call print_debug_scf_complex
+    energy_SCF = scf_energy
+    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+      do kk=1,kpt_num
+        Fock_matrix_AO_kpts(1:ao_num_per_kpt,1:ao_num_per_kpt,kk) = &
+            Fock_matrix_DIIS (1:ao_num_per_kpt,1:ao_num_per_kpt,index_dim_DIIS,kk)
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH Fock_matrix_AO_alpha_kpts Fock_matrix_AO_beta_kpts
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef_kpts level_shift
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = &
+          eigenvectors_fock_matrix_mo_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef_kpts
+      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+      dim_DIIS=0
+    enddo
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_scf, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if (qp_stop()) exit
+
+  enddo
+
+ if (iteration_SCF < n_it_SCF_max) then
+   mo_label = "Canonical"
+ endif
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix_kpts(Fock_matrix_mo_kpts,size(Fock_matrix_mo_kpts,1),size(Fock_matrix_mo_kpts,2),size(Fock_matrix_mo_kpts,3),mo_label,1,.true.)
+   call save_mos
+  endif
+
+  call write_double(6, Energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
+subroutine extrapolate_Fock_matrix_kpts(       &
+           error_matrix_DIIS,Fock_matrix_DIIS,    &
+           Fock_matrix_AO_,size_Fock_matrix_AO,   &
+           iteration_SCF,dim_DIIS                 &
+           )
+
+BEGIN_DOC
+! Compute the extrapolated Fock matrix using the DIIS procedure
+END_DOC
+
+  implicit none
+
+  complex*16,intent(in)   :: Fock_matrix_DIIS(ao_num_per_kpt,ao_num_per_kpt,*),error_matrix_DIIS(ao_num_per_kpt,ao_num_per_kpt,*)
+  integer,intent(in)            :: iteration_SCF, size_Fock_matrix_AO
+  complex*16,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num_per_kpt)
+  integer,intent(inout)         :: dim_DIIS
+
+  double precision,allocatable  :: B_matrix_DIIS(:,:),X_vector_DIIS(:)
+  double precision,allocatable  :: C_vector_DIIS(:)
+  double precision              :: accum_im, thr_im
+  complex*16,allocatable  :: scratch(:,:)
+  integer                       :: i,j,k,i_DIIS,j_DIIS
+  thr_im = 1.0d-10
+  allocate(                               &
+    B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), &
+    X_vector_DIIS(dim_DIIS+1),            &
+    C_vector_DIIS(dim_DIIS+1),            &
+    scratch(ao_num,ao_num)                &
+  )
+
+! Compute the matrices B and X
+  do j=1,dim_DIIS
+    do i=1,dim_DIIS
+
+      j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1
+      i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1
+
+! Compute product of two errors vectors
+
+      call zgemm('N','N',ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,                      &
+           (1.d0,0.d0),                                             &
+           error_matrix_DIIS(1,1,i_DIIS),size(error_matrix_DIIS,1), &
+           error_matrix_DIIS(1,1,j_DIIS),size(error_matrix_DIIS,1), &
+           (0.d0,0.d0),                                             &
+           scratch,size(scratch,1))
+
+! Compute Trace
+
+      B_matrix_DIIS(i,j) = 0.d0
+      accum_im = 0.d0
+      do k=1,ao_num_per_kpt
+        B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + dble(scratch(k,k))
+        accum_im = accum_im + dimag(scratch(k,k))
+      enddo
+      if (dabs(accum_im) .gt. thr_im) then
+        !stop 'problem with imaginary parts in DIIS B_matrix?'
+        print*, 'problem with imaginary parts in DIIS B_matrix?',accum_im
+      endif
+    enddo
+  enddo
+  deallocate(scratch)
+! Pad B matrix and build the X matrix
+
+  do i=1,dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+    C_vector_DIIS(i) = 0.d0
+  enddo
+  B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+! Solve the linear system C = B.X
+
+  integer              :: info
+  integer,allocatable  :: ipiv(:)
+
+  allocate(          &
+    ipiv(dim_DIIS+1) &
+  )
+
+  double precision, allocatable :: AF(:,:),scratch_d1(:)
+  allocate (AF(dim_DIIS+1,dim_DIIS+1),scratch_d1(1))
+  double precision :: rcond, ferr, berr
+  integer :: iwork(dim_DIIS+1), lwork
+
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch_d1,-1,                       &
+    iwork,                               &
+    info                                 &
+  )
+  lwork = int(scratch_d1(1))
+  deallocate(scratch_d1)
+  allocate(scratch_d1(lwork))
+
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch_d1,size(scratch_d1),         &
+    iwork,                               &
+    info                                 &
+  )
+  deallocate(scratch_d1,ipiv)
+
+ if(info < 0) then
+   stop 'bug in DIIS'
+ endif
+
+ if (rcond > 1.d-12) then
+
+  ! Compute extrapolated Fock matrix
+
+
+      !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num_per_kpt > 200)
+      do j=1,ao_num_per_kpt
+        do i=1,ao_num_per_kpt
+          Fock_matrix_AO_(i,j) = (0.d0,0.d0)
+        enddo
+        do k=1,dim_DIIS
+          do i=1,ao_num_per_kpt
+            Fock_matrix_AO_(i,j) = Fock_matrix_AO_(i,j) +            &
+                X_vector_DIIS(k)*Fock_matrix_DIIS(i,j,dim_DIIS-k+1)
+          enddo
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+
+  else
+    dim_DIIS = 0
+  endif
+
+end
+
+!============================================!
+!                                            !
+!                    kpts_real               !
+!                                            !
+!============================================!
+
+subroutine Roothaan_Hall_SCF_kpts_real
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  complex*16, allocatable  :: Fock_matrix_DIIS(:,:,:,:),error_matrix_DIIS(:,:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+
+  integer                        :: i,j,k,kk
+  logical, external              :: qp_stop
+  complex*16, allocatable :: mo_coef_save(:,:,:)
+
+  PROVIDE ao_md5 mo_occ_kpts level_shift
+
+  allocate(mo_coef_save(ao_num_per_kpt,mo_num_per_kpt,kpt_num),                          &
+      Fock_matrix_DIIS (ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num),                 &
+      error_matrix_DIIS(ao_num_per_kpt,ao_num_per_kpt,max_dim_DIIS,kpt_num)                  &
+      )
+      !todo: add kpt_num dim to diis mats? (3 or 4)
+  call write_time(6)
+
+  print*,'Energy of the guess = ',scf_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  !PROVIDE fps_spf_matrix_ao_complex fock_matrix_ao_complex 
+  PROVIDE fps_spf_matrix_ao_kpts fock_matrix_ao_kpts 
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+! Increment cycle number
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+
+    if (scf_algorithm == 'DIIS') then
+
+      do kk=1,kpt_num
+        ! Store Fock and error matrices at each iteration
+        do j=1,ao_num_per_kpt
+          do i=1,ao_num_per_kpt
+            index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+            Fock_matrix_DIIS (i,j,index_dim_DIIS,kk) = fock_matrix_ao_kpts(i,j,kk)
+            error_matrix_DIIS(i,j,index_dim_DIIS,kk) = fps_spf_matrix_ao_kpts(i,j,kk)
+          enddo
+        enddo
+
+        ! Compute the extrapolated Fock matrix
+
+        call extrapolate_fock_matrix_kpts(                            &
+            error_matrix_DIIS(1,1,1,kk),Fock_matrix_DIIS(1,1,1,kk),        &
+            Fock_matrix_AO_kpts(1,1,kk),size(Fock_matrix_AO_kpts,1),       &
+            iteration_SCF,dim_DIIS                                       &
+            )
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH Fock_matrix_AO_alpha_kpts Fock_matrix_AO_beta_kpts
+
+    endif
+
+    mo_coef_kpts = eigenvectors_fock_matrix_mo_kpts
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH mo_coef_kpts
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(cdabs(FPS_SPF_Matrix_MO_kpts))
+
+!   SCF energy
+!    call print_debug_scf_complex
+    energy_SCF = scf_energy
+    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+      do kk=1,kpt_num
+        Fock_matrix_AO_kpts(1:ao_num_per_kpt,1:ao_num_per_kpt,kk) = &
+            Fock_matrix_DIIS (1:ao_num_per_kpt,1:ao_num_per_kpt,index_dim_DIIS,kk)
+      enddo
+      Fock_matrix_AO_alpha_kpts = Fock_matrix_AO_kpts*0.5d0
+      Fock_matrix_AO_beta_kpts  = Fock_matrix_AO_kpts*0.5d0
+      TOUCH fock_matrix_ao_alpha_kpts Fock_matrix_AO_beta_kpts
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef_kpts level_shift
+      mo_coef_kpts(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num) = &
+          eigenvectors_fock_matrix_mo_kpts_real(1:ao_num_per_kpt,1:mo_num_per_kpt,1:kpt_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef_kpts
+      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+      dim_DIIS=0
+    enddo
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_scf, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if (qp_stop()) exit
+
+  enddo
+
+ if (iteration_SCF < n_it_SCF_max) then
+   mo_label = "Canonical"
+ endif
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix_kpts_real(fock_matrix_mo_kpts_real,size(Fock_matrix_mo_kpts_real,1),size(Fock_matrix_mo_kpts_real,2),size(Fock_matrix_mo_kpts_real,3),mo_label,1,.true.)
+   call save_mos
+  endif
+
+  call write_double(6, Energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/scf_density_matrix_ao_cplx.irp.f b/src/scf_utils/scf_density_matrix_ao_cplx.irp.f
new file mode 100644
index 00000000..9726690c
--- /dev/null
+++ b/src/scf_utils/scf_density_matrix_ao_cplx.irp.f
@@ -0,0 +1,126 @@
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_alpha_complex, (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $C.C^t$ over $\alpha$ MOs
+   END_DOC
+   
+   complex*16, allocatable :: mo_coef_alpha_tmp(:,:)
+   integer :: occ(N_int*bit_kind_size)
+   integer :: na, i
+
+   call bitstring_to_list(hf_bitmask(1,1), occ, na, n_int)
+   allocate(mo_coef_alpha_tmp(ao_num,na))
+   do i=1,na
+     mo_coef_alpha_tmp(:,i) = mo_coef_complex(:,occ(i))
+   enddo
+
+
+   call zgemm('N','C',ao_num,ao_num,elec_alpha_num,(1.d0,0.d0), &
+        mo_coef_alpha_tmp, size(mo_coef_alpha_tmp,1), &
+        mo_coef_alpha_tmp, size(mo_coef_alpha_tmp,1), (0.d0,0.d0), &
+        scf_density_matrix_ao_alpha_complex, size(scf_density_matrix_ao_alpha_complex,1))
+
+   deallocate(mo_coef_alpha_tmp)
+   !call zgemm('N','C',ao_num,ao_num,elec_alpha_num,(1.d0,0.d0), &
+   !     mo_coef_complex, size(mo_coef_complex,1), &
+   !     mo_coef_complex, size(mo_coef_complex,1), (0.d0,0.d0), &
+   !     scf_density_matrix_ao_alpha_complex, size(scf_density_matrix_ao_alpha_complex,1))
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_beta_complex,  (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $C.C^t$ over $\beta$ MOs
+   END_DOC
+
+   complex*16, allocatable :: mo_coef_beta_tmp(:,:)
+   integer :: occ(N_int*bit_kind_size)
+   integer :: nb, i
+
+   call bitstring_to_list(hf_bitmask(1,2), occ, nb, n_int)
+   allocate(mo_coef_beta_tmp(ao_num,nb))
+   do i=1,nb
+     mo_coef_beta_tmp(:,i) = mo_coef_complex(:,occ(i))
+   enddo
+
+
+   call zgemm('N','C',ao_num,ao_num,elec_beta_num,(1.d0,0.d0), &
+        mo_coef_beta_tmp, size(mo_coef_beta_tmp,1), &
+        mo_coef_beta_tmp, size(mo_coef_beta_tmp,1), (0.d0,0.d0), &
+        scf_density_matrix_ao_beta_complex, size(scf_density_matrix_ao_beta_complex,1))
+
+   deallocate(mo_coef_beta_tmp)
+   !call zgemm('N','C',ao_num,ao_num,elec_beta_num,(1.d0,0.d0), &
+   !     mo_coef_complex, size(mo_coef_complex,1), &
+   !     mo_coef_complex, size(mo_coef_complex,1), (0.d0,0.d0), &
+   !     scf_density_matrix_ao_beta_complex, size(scf_density_matrix_ao_beta_complex,1))
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_complex, (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Sum of $\alpha$ and $\beta$ density matrices
+   END_DOC
+   ASSERT (size(scf_density_matrix_ao_complex,1) == size(scf_density_matrix_ao_alpha_complex,1))
+   if (elec_alpha_num== elec_beta_num) then
+     scf_density_matrix_ao_complex = scf_density_matrix_ao_alpha_complex + scf_density_matrix_ao_alpha_complex
+   else
+     ASSERT (size(scf_density_matrix_ao_complex,1) == size(scf_density_matrix_ao_beta_complex ,1))
+     scf_density_matrix_ao_complex = scf_density_matrix_ao_alpha_complex + scf_density_matrix_ao_beta_complex
+   endif
+
+END_PROVIDER
+
+!============================================!
+!                                            !
+!                    kpts                    !
+!                                            !
+!============================================!
+
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_alpha_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $C.C^t$ over $\alpha$ MOs
+   END_DOC
+   
+   integer :: k
+   do k=1,kpt_num
+     call zgemm('N','C',ao_num_per_kpt,ao_num_per_kpt,elec_alpha_num_kpts(k),(1.d0,0.d0), &
+          mo_coef_kpts(1,1,k), size(mo_coef_kpts,1), &
+          mo_coef_kpts(1,1,k), size(mo_coef_kpts,1), (0.d0,0.d0), &
+          scf_density_matrix_ao_alpha_kpts(1,1,k), size(scf_density_matrix_ao_alpha_kpts,1))
+   enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_beta_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! $C.C^t$ over $\beta$ MOs
+   END_DOC
+   
+   integer :: k
+   do k=1,kpt_num
+     call zgemm('N','C',ao_num_per_kpt,ao_num_per_kpt,elec_beta_num_kpts(k),(1.d0,0.d0), &
+          mo_coef_kpts(1,1,k), size(mo_coef_kpts,1), &
+          mo_coef_kpts(1,1,k), size(mo_coef_kpts,1), (0.d0,0.d0), &
+          scf_density_matrix_ao_beta_kpts(1,1,k), size(scf_density_matrix_ao_beta_kpts,1))
+   enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ complex*16, scf_density_matrix_ao_kpts, (ao_num_per_kpt,ao_num_per_kpt,kpt_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Sum of $\alpha$ and $\beta$ density matrices
+   END_DOC
+   ASSERT (size(scf_density_matrix_ao_kpts,1) == size(scf_density_matrix_ao_alpha_kpts,1))
+   if (elec_alpha_num== elec_beta_num) then
+     scf_density_matrix_ao_kpts = scf_density_matrix_ao_alpha_kpts + scf_density_matrix_ao_alpha_kpts
+   else
+     ASSERT (size(scf_density_matrix_ao_kpts,1) == size(scf_density_matrix_ao_beta_kpts ,1))
+     scf_density_matrix_ao_kpts = scf_density_matrix_ao_alpha_kpts + scf_density_matrix_ao_beta_kpts
+   endif
+
+END_PROVIDER
+
diff --git a/src/selectors_full/selectors.irp.f b/src/selectors_full/selectors.irp.f
index 0531f731..20edb6b2 100644
--- a/src/selectors_full/selectors.irp.f
+++ b/src/selectors_full/selectors.irp.f
@@ -30,8 +30,7 @@ BEGIN_PROVIDER [ integer, N_det_selectors]
   call write_int(6,N_det_selectors,'Number of selectors')
 END_PROVIDER
 
- BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
-&BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
   implicit none
   BEGIN_DOC
   ! Determinants on which we apply <i|H|psi> for perturbation.
@@ -44,6 +43,16 @@ END_PROVIDER
       psi_selectors(k,2,i) = psi_det_sorted(k,2,i)
     enddo
   enddo
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Determinants on which we apply <i|H|psi> for perturbation.
+  END_DOC
+  integer                        :: i,k
+
   do k=1,N_states
     do i=1,N_det_selectors
       psi_selectors_coef(i,k) = psi_coef_sorted(i,k)
@@ -52,4 +61,19 @@ END_PROVIDER
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, psi_selectors_coef_complex, (psi_selectors_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Determinants on which we apply <i|H|psi> for perturbation.
+  END_DOC
+  integer                        :: i,k
+
+  do k=1,N_states
+    do i=1,N_det_selectors
+      psi_selectors_coef_complex(i,k) = psi_coef_sorted_complex(i,k)
+    enddo
+  enddo
+
+END_PROVIDER
+
 
diff --git a/src/selectors_utils/selectors.irp.f b/src/selectors_utils/selectors.irp.f
index 92366d1d..4460979c 100644
--- a/src/selectors_utils/selectors.irp.f
+++ b/src/selectors_utils/selectors.irp.f
@@ -32,3 +32,17 @@ BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size
 END_PROVIDER
 
 
+BEGIN_PROVIDER [ complex*16, psi_selectors_coef_transp_complex, (N_states,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp_complex(k,i) = psi_selectors_coef_complex(i,k)
+    enddo
+  enddo
+END_PROVIDER
+
diff --git a/src/single_ref_method/generators.irp.f b/src/single_ref_method/generators.irp.f
index ce71f996..860f357a 100644
--- a/src/single_ref_method/generators.irp.f
+++ b/src/single_ref_method/generators.irp.f
@@ -25,6 +25,7 @@ END_PROVIDER
    psi_det_generators(i,2,1) = HF_bitmask(i,2)
  enddo
 
+ ! Search for HF determinant
  do j=1,N_det
    call get_excitation_degree(HF_bitmask,psi_det(1,1,j),degree,N_int)
    if (degree == 0) then
@@ -55,4 +56,25 @@ BEGIN_PROVIDER [ integer, size_select_max ]
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ complex*16, psi_coef_generators_complex, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Complex variant of psi_coef_generators
+ END_DOC
+
+ integer :: i,j,k
+ integer :: degree
+
+ ! Search for HF determinant
+ do j=1,N_det
+   call get_excitation_degree(HF_bitmask,psi_det(1,1,j),degree,N_int)
+   if (degree == 0) then
+     k = j
+     exit
+   endif
+ end do
+
+ psi_coef_generators_complex(1,:) = psi_coef_generators_complex(j,:)
+
+END_PROVIDER
 
diff --git a/src/tools/diagonalize_h.irp.f b/src/tools/diagonalize_h.irp.f
index c9ae2033..ee9531e9 100644
--- a/src/tools/diagonalize_h.irp.f
+++ b/src/tools/diagonalize_h.irp.f
@@ -17,7 +17,11 @@ end
 
 subroutine routine
  implicit none
- call diagonalize_CI
+ call diagonalize_ci
  print*,'N_det = ',N_det
- call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+ if (is_complex) then
+  call save_wavefunction_general_complex(N_det,N_states,psi_det_sorted,size(psi_coef_sorted_complex,1),psi_coef_sorted_complex)
+ else
+  call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
+ endif
 end
diff --git a/src/tools/fcidump.irp.f b/src/tools/fcidump.irp.f
index bf4d07fb..de878dc6 100644
--- a/src/tools/fcidump.irp.f
+++ b/src/tools/fcidump.irp.f
@@ -18,6 +18,97 @@ program fcidump
 !   electrons
 !
   END_DOC
+  if (is_complex) then
+    call fcidump_complex
+  else
+    call fcidump_real
+  endif
+end
+
+subroutine fcidump_complex
+  implicit none
+  character*(128) :: output
+  integer :: i_unit_output,getUnitAndOpen
+  output=trim(ezfio_filename)//'.FCIDUMP'
+  i_unit_output = getUnitAndOpen(output,'w')
+
+  integer :: i,j,k,l
+  integer :: i1,j1,k1,l1
+  integer :: i2,j2,k2,l2,ik2,jl2
+  integer :: ki,kj,kk,kl
+  integer :: ii,ij,ik,il
+  integer*8 :: m
+  character*(2), allocatable :: A(:)
+
+  write(i_unit_output,*) '&FCI NORB=', n_act_orb, ', NELEC=', elec_num-n_core_orb*2, &
+   ', MS2=', (elec_alpha_num-elec_beta_num), ','
+  allocate (A(n_act_orb))
+  A = '1,'
+  write(i_unit_output,*) 'ORBSYM=', (A(i), i=1,n_act_orb)
+  write(i_unit_output,*) 'ISYM=0,'
+  write(i_unit_output,*) '/'
+  deallocate(A)
+
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  integer(cache_map_size_kind)   :: n_elements, n_elements_max
+  PROVIDE mo_two_e_integrals_in_map
+
+  complex*16 :: get_two_e_integral_complex, integral
+
+  do kl=1,kpt_num
+    do kj=1,kl
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki>kl) cycle
+        do l1=1,n_act_orb_kpts(kl)
+          il=list_act_kpts(l1,kl)
+          l = (kl-1)*mo_num_per_kpt + il
+          do j1=1,n_act_orb_kpts(kj)
+            ij=list_act_kpts(j1,kj)
+            j = (kj-1)*mo_num_per_kpt + ij
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do k1=1,n_act_orb_kpts(kk)
+              ik=list_act_kpts(k1,kk)
+              k = (kk-1)*mo_num_per_kpt + ik
+              if (k>l) exit
+              do i1=1,n_act_orb_kpts(ki)
+                ii=list_act_kpts(i1,ki)
+                i = (ki-1)*mo_num_per_kpt + ii
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                integral = get_two_e_integral_complex(i,j,k,l,mo_integrals_map,mo_integrals_map_2)
+                if (cdabs(integral) > mo_integrals_threshold) then
+                  write(i_unit_output,'(2(E25.15,X),4(I6,X))') dble(integral), dimag(integral),i,k,j,l
+                endif
+              enddo
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do kj=1,kpt_num
+    do j1=1,n_act_orb_kpts(kj)
+      ij = list_act_kpts(j1,kj)
+      j = (kj-1)*mo_num_per_kpt + ij
+      do i1=j1,n_act_orb_kpts(kj)
+        ii = list_act_kpts(i1,kj)
+        i = (kj-1)*mo_num_per_kpt + ii
+        integral = mo_one_e_integrals_kpts(ii,ij,kj) + core_fock_operator_complex(i,j)
+        if (cdabs(integral) > mo_integrals_threshold) then
+          write(i_unit_output,'(2(E25.15,X),4(I6,X))') dble(integral),dimag(integral), i,j,0,0
+        endif
+      enddo
+    enddo
+  enddo
+  write(i_unit_output,*) core_energy, 0, 0, 0, 0
+end
+subroutine fcidump_real
+  implicit none
   character*(128) :: output
   integer :: i_unit_output,getUnitAndOpen
   output=trim(ezfio_filename)//'.FCIDUMP'
diff --git a/src/tools/print_energy.irp.f b/src/tools/print_energy.irp.f
index f78dffc8..9e263409 100644
--- a/src/tools/print_energy.irp.f
+++ b/src/tools/print_energy.irp.f
@@ -9,7 +9,11 @@ program print_energy
  read_wf = .True.
  touch read_wf
  PROVIDE N_states
- call run
+ if (is_complex) then
+  call run_complex
+ else
+  call run
+ endif
 end
 
 subroutine run
@@ -35,3 +39,27 @@ subroutine run
    print *, E(i)/norm(i)
  enddo
 end
+
+subroutine run_complex
+  implicit none
+  integer :: i,j
+  complex*16 :: i_h_psi_array(n_states)
+  double precision :: e(n_states)
+  double precision :: norm(n_states)
+ 
+  e(1:n_states) = nuclear_repulsion
+  norm(1:n_states) = 0.d0
+  do i=1,n_det
+   call i_H_psi_complex(psi_det(1,1,i), psi_det, psi_coef_complex, N_int, N_det, &
+                size(psi_coef_complex,1), N_states, i_H_psi_array)
+    do j=1,n_states
+      norm(j) += cdabs(psi_coef_complex(i,j))**2
+      E(j) += dble(i_h_psi_array(j) * dconjg(psi_coef_complex(i,j)))
+    enddo
+  enddo
+ 
+  print *, 'Energy:'
+  do i=1,N_states
+    print *, E(i)/norm(i)
+  enddo
+end
diff --git a/src/tools/print_hamiltonian.irp.f b/src/tools/print_hamiltonian.irp.f
index 207161dd..183fd502 100644
--- a/src/tools/print_hamiltonian.irp.f
+++ b/src/tools/print_hamiltonian.irp.f
@@ -9,7 +9,11 @@ program print_hamiltonian
  ! psi_coef_sorted are the wave function stored in the |EZFIO| directory.
  read_wf = .True.
  touch read_wf
- call run
+ if (is_complex) then
+  call run_complex
+ else
+  call run
+ endif
 end
 
 subroutine run
@@ -27,3 +31,42 @@ subroutine run
  enddo
 
 end
+
+subroutine run_complex
+ implicit none
+ integer :: i, j
+ complex*16 :: hij
+ double precision :: s2
+
+ print*,'i,j,Hij'
+ do j=1,N_det
+   do i=1,N_det
+     call i_h_j_complex(psi_det(1,1,i), psi_det(1,1,j), N_int, hij)
+     if (cdabs(hij) > 1.d-20) then
+       print *, i, j, dble(hij), dimag(hij)
+     endif
+   enddo
+ enddo
+ print*,'i,j,S2ij'
+ do j=1,N_det
+   do i=1,N_det
+     call get_s2(psi_det(1,1,i), psi_det(1,1,j), N_int, s2)
+     if (dabs(s2) > 1.d-20) then
+       print *, i, j, s2
+     endif
+   enddo
+ enddo
+!  use bitmasks
+  integer                        :: degree
+
+ print*,'i,j,degij'
+ do j=1,N_det
+   do i=1,N_det
+     call get_excitation_degree(psi_det(1,1,i), psi_det(1,1,j), degree, N_int)
+     if (degree.le.2) then
+       print *, i, j, degree
+     endif
+   enddo
+ enddo
+
+end
diff --git a/src/tools/save_natorb.irp.f b/src/tools/save_natorb.irp.f
index f6331d13..9a046787 100644
--- a/src/tools/save_natorb.irp.f
+++ b/src/tools/save_natorb.irp.f
@@ -17,6 +17,7 @@ program save_natorb
   call save_natural_mos
   call save_ref_determinant
   call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('None')
+  call ezfio_set_mo_two_e_ints_io_df_mo_integrals('None')
   call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('None')
   call ezfio_set_mo_one_e_ints_io_mo_integrals_kinetic('None')
   call ezfio_set_mo_one_e_ints_io_mo_integrals_n_e('None')
diff --git a/src/tools/swap_mos.irp.f b/src/tools/swap_mos.irp.f
index bba9cb34..44b9cc50 100644
--- a/src/tools/swap_mos.irp.f
+++ b/src/tools/swap_mos.irp.f
@@ -7,11 +7,20 @@ program swap_mos
   double precision               :: x
   print *,  'MOs to swap?'
   read(*,*) i1, i2
-  do i=1,ao_num
-    x = mo_coef(i,i1)
-    mo_coef(i,i1) = mo_coef(i,i2)
-    mo_coef(i,i2) = x
-  enddo
+  if (is_complex) then
+    complex*16 :: xc
+    do i=1,ao_num
+      xc = mo_coef_complex(i,i1)
+      mo_coef_complex(i,i1) = mo_coef_complex(i,i2)
+      mo_coef_complex(i,i2) = xc
+    enddo
+  else
+    do i=1,ao_num
+      x = mo_coef(i,i1)
+      mo_coef(i,i1) = mo_coef(i,i2)
+      mo_coef(i,i2) = x
+    enddo
+  endif
   call save_mos
   
 end
diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F
index 7399b4a6..bad68054 100644
--- a/src/utils/constants.include.F
+++ b/src/utils/constants.include.F
@@ -7,6 +7,7 @@ double precision, parameter :: sqpi =  dsqrt(dacos(-1.d0))
 double precision, parameter :: pi_5_2 =  34.9868366552d0
 double precision, parameter :: dfour_pi =  4.d0*dacos(-1.d0)
 double precision, parameter :: dtwo_pi =  2.d0*dacos(-1.d0)
+double precision, parameter :: inv_pi =  1.d0/dacos(-1.d0)
 double precision, parameter :: inv_sq_pi =  1.d0/dsqrt(dacos(-1.d0))
 double precision, parameter :: inv_sq_pi_2 = 0.5d0/dsqrt(dacos(-1.d0))
 double precision, parameter :: thresh = 1.d-15
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index a8dea97a..42ccfe0b 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -164,7 +164,7 @@ subroutine ortho_canonical_complex(overlap,LDA,N,C,LDC,m,cutoff)
 end
 
 
-subroutine ortho_qr_complex(A,LDA,m,n)
+subroutine ortho_qr_complex_old(A,LDA,m,n)
   implicit none
   BEGIN_DOC
   ! Orthogonalization using Q.R factorization
@@ -196,6 +196,61 @@ subroutine ortho_qr_complex(A,LDA,m,n)
   deallocate(WORK,jpvt,tau)
 end
 
+subroutine ortho_qr_complex(A,LDA,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !   
+  ! A : matrix to orthogonalize
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! n : Number of columns? of A
+  !
+  ! m : Number of rows? of A
+  !
+  END_DOC
+  integer, intent(in)            :: m,n, LDA
+  complex*16, intent(inout) :: A(LDA,n)
+  
+  integer                        :: lwork, info
+  complex*16, allocatable  :: tau(:), work(:)
+
+  allocate(tau(n), work(1))
+  lwork=-1
+  call  zgeqrf( m, n, A, LDA, tau, work, lwork, info )
+  lwork=int(work(1))
+  deallocate(work)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zgeqrf had an illegal value'
+    stop 1
+  endif
+  allocate(work(lwork))
+  call  zgeqrf(m, n, A, LDA, tau, work, lwork, info )
+  deallocate(work)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zgeqrf had an illegal value'
+    stop 2
+  endif
+
+  lwork=-1
+  allocate(work(1))
+  call  zungqr(m, n, n, A, LDA, tau, work, lwork, info)
+  lwork=int(work(1))
+  deallocate(work)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zgeqrf had an illegal value'
+    stop 3
+  endif
+  allocate(work(lwork))
+  call  zungqr(m, n, n, A, LDA, tau, work, lwork, info)
+  deallocate(work,tau)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zgeqrf had an illegal value'
+    stop 4
+  endif
+end
+
 subroutine ortho_qr_unblocked_complex(A,LDA,m,n)
   implicit none
   BEGIN_DOC
@@ -205,25 +260,29 @@ subroutine ortho_qr_unblocked_complex(A,LDA,m,n)
   !
   ! LDA : leftmost dimension of A
   !
-  ! n : Number of rows of A
+  ! n : Number of columns of A
   !
-  ! m : Number of columns of A
+  ! m : Number of rows of A
   !
   END_DOC
   integer, intent(in)            :: m,n, LDA
-  double precision, intent(inout) :: A(LDA,n)
+  complex*16, intent(inout)      :: A(LDA,n)
 
   integer                        :: info
-  integer, allocatable           :: jpvt(:)
-  double precision, allocatable  :: tau(:), work(:)
+  complex*16, allocatable        :: tau(:), work(:)
 
-  print *, irp_here, ': TO DO'
-  stop -1
-
-!  allocate (jpvt(n), tau(n), work(n))
-!  call  dgeqr2( m, n, A, LDA, TAU, WORK, INFO )
-!  call dorg2r(m, n, n, A, LDA, tau, WORK, INFO)
-!  deallocate(WORK,jpvt,tau)
+  allocate(tau(n),work(n))
+  call zgeqr2(m,n,A,LDA,tau,work,info)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zgeqr2 had an illegal value'
+    stop 1
+  endif
+  call zung2r(m,n,n,A,LDA,tau,work,info)
+  if (info.ne.0) then
+    print*,irp_here,' The ',-info,' argument to zung2r had an illegal value'
+    stop 2
+  endif
+  deallocate(work,tau)
 end
 
 subroutine ortho_lowdin_complex(overlap,LDA,N,C,LDC,m,cutoff)
@@ -1218,3 +1277,77 @@ subroutine lapack_diag(eigvalues,eigvectors,H,nmax,n)
   deallocate(A,eigenvalues)
 end
 
+subroutine lapack_diagd_diag_in_place(eigvalues,eigvectors,nmax,n)
+  implicit none
+  BEGIN_DOC
+  ! Diagonalize matrix H(complex)
+  !
+  ! H is untouched between input and ouptut
+  !
+  ! eigevalues(i) = ith lowest eigenvalue of the H matrix
+  !
+  ! eigvectors(i,j) = <i|psi_j> where i is the basis function and psi_j is the j th eigenvector
+  !
+  END_DOC
+  integer, intent(in)            :: n,nmax
+  double precision, intent(out)  :: eigvectors(nmax,n)
+!  complex*16, intent(inout)        :: eigvectors(nmax,n)
+  double precision, intent(out)  :: eigvalues(n)
+!  double precision, intent(in)   :: H(nmax,n)
+  double precision,allocatable         :: work(:)
+  integer         ,allocatable   :: iwork(:)
+!  complex*16,allocatable         :: A(:,:)
+  integer                        :: lwork, info, i,j,l,k, liwork
+
+! print*,'Diagonalization by jacobi'
+! print*,'n = ',n
+
+  lwork = 2*n*n + 6*n + 1
+  liwork = 5*n + 3
+  allocate (work(lwork),iwork(liwork))
+
+  lwork = -1
+  liwork = -1
+  ! get optimal work size
+  call DSYEVD( 'V', 'U', n, eigvectors, nmax, eigvalues, work, lwork, &
+    iwork, liwork, info )
+  if (info < 0) then
+    print *, irp_here, ': DSYEVD: the ',-info,'-th argument had an illegal value'
+    stop 2
+  endif
+  lwork  = int( real(work(1)))
+  liwork = iwork(1)
+  deallocate (work,iwork)
+
+  allocate (work(lwork),iwork(liwork))
+  call DSYEVD( 'V', 'U', n, eigvectors, nmax, eigvalues, work, lwork, &
+    iwork, liwork, info )
+  deallocate(work,iwork)
+
+
+  if (info < 0) then
+    print *, irp_here, ': DSYEVD: the ',-info,'-th argument had an illegal value'
+    stop 2
+  else if( info > 0  ) then
+     write(*,*)'DSYEVD Failed; calling DSYEV'
+     lwork = 3*n - 1
+     allocate(work(lwork))
+     lwork = -1
+     call DSYEV('V','L',n,eigvectors,nmax,eigvalues,work,lwork,info)
+     if (info < 0) then
+       print *, irp_here, ': DSYEV: the ',-info,'-th argument had an illegal value'
+       stop 2
+     endif
+     lwork = int(work(1))
+     deallocate(work)
+     allocate(work(lwork))
+     call DSYEV('V','L',n,eigvectors,nmax,eigvalues,work,lwork,info)
+     if (info /= 0 ) then
+       write(*,*)'DSYEV Failed'
+       stop 1
+     endif
+    deallocate(work)
+  end if
+
+end
+
diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90
index 98e73470..e41ee8d0 100644
--- a/src/utils/map_module.f90
+++ b/src/utils/map_module.f90
@@ -531,13 +531,30 @@ subroutine map_get(map, key, value)
   real(integral_kind), intent(out) :: value
   integer(map_size_kind)         :: idx_cache
   integer(cache_map_size_kind)   :: idx
-
+  idx=1
   ! index in tha pointers array
   idx_cache = shiftr(key,map_shift)
   !DIR$ FORCEINLINE
   call cache_map_get_interval(map%map(idx_cache), key, value, 1, map%map(idx_cache)%n_elements,idx)
 end
 
+subroutine map_get_2(map, key, value1, value2)
+  use map_module
+  implicit none
+  type (map_type), intent(inout) :: map
+  integer(key_kind), intent(in)  :: key
+  real(integral_kind), intent(out) :: value1, value2
+  integer(map_size_kind)         :: idx_cache
+  integer(cache_map_size_kind)   :: idx
+
+  idx=1
+  ! index in tha pointers array
+  idx_cache = shiftr(key,map_shift)
+  !DIR$ FORCEINLINE
+  call cache_map_get_interval(map%map(idx_cache), key, value1, 1, map%map(idx_cache)%n_elements,idx)
+  call cache_map_get_interval(map%map(idx_cache), key+1, value2, idx+1, idx+2, idx)
+end
+
 subroutine cache_map_get_interval(map, key, value, ibegin, iend, idx)
   use map_module
   implicit none
diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f
index 2a655eed..ccde5752 100644
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@@ -346,6 +346,7 @@ SUBST [ X, type ]
  i ; integer ;;
  i8; integer*8 ;;
  i2; integer*2 ;;
+ cd; complex*16 ;;
 END_TEMPLATE
 
 
@@ -381,6 +382,16 @@ BEGIN_TEMPLATE
 
  end subroutine insertion_$Xsort_big
 
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+ i ; integer ;;
+ i8; integer*8 ;;
+ i2; integer*2 ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
  subroutine $Xset_order_big(x,iorder,isize)
   implicit none
   BEGIN_DOC
@@ -408,6 +419,7 @@ BEGIN_TEMPLATE
 SUBST [ X, type ]
    ; real ;;
  d ; double precision ;;
+ cd; complex*16 ;;
  i ; integer ;;
  i8; integer*8 ;;
  i2; integer*2 ;;
diff --git a/src/utils/transpose.irp.f b/src/utils/transpose.irp.f
index 7c86f458..ddffb172 100644
--- a/src/utils/transpose.irp.f
+++ b/src/utils/transpose.irp.f
@@ -84,3 +84,100 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2)
 
 end
 
+
+!DIR$ attributes forceinline :: cdtranspose
+recursive subroutine cdtranspose(A,LDA,B,LDB,d1,d2)
+ implicit none
+ BEGIN_DOC
+! Transpose input matrix A into output matrix B
+! don't take complex conjugate
+ END_DOC
+ integer, intent(in) :: d1, d2, LDA, LDB
+ complex*16, intent(in) :: A(LDA,d2)
+ complex*16, intent(out) :: B(LDB,d1)
+
+
+! do j=1,d1
+!   do i=1,d2
+!    B(i,j  ) = A(j  ,i)
+!   enddo
+! enddo
+! return
+
+ integer :: i,j,k
+ if ( d2 < 32 ) then
+   do j=1,d1
+     !DIR$ LOOP COUNT (16)
+     do i=1,d2
+      B(i,j  ) = A(j  ,i)
+     enddo
+    enddo
+   return
+ else if (d1 > d2) then
+ !DIR$ forceinline 
+   k=d1/2
+ !DIR$ forceinline recursive
+   call cdtranspose(A(1,1),LDA,B(1,1),LDB,k,d2)
+ !DIR$ forceinline recursive
+   call cdtranspose(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2)
+   return
+ else
+ !DIR$ forceinline
+   k=d2/2
+ !DIR$ forceinline recursive
+   call cdtranspose(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k)
+ !DIR$ forceinline recursive
+   call cdtranspose(A(1,1),LDA,B(1,1),LDB,d1,k)
+   return
+ endif
+
+end
+
+!DIR$ attributes forceinline :: cdadjoint
+recursive subroutine cdadjoint(A,LDA,B,LDB,d1,d2)
+ implicit none
+ BEGIN_DOC
+! Transpose input matrix A into output matrix B
+! and take complex conjugate
+ END_DOC
+ integer, intent(in) :: d1, d2, LDA, LDB
+ complex*16, intent(in) :: A(LDA,d2)
+ complex*16, intent(out) :: B(LDB,d1)
+
+
+! do j=1,d1
+!   do i=1,d2
+!    B(i,j  ) = A(j  ,i)
+!   enddo
+! enddo
+! return
+
+ integer :: i,j,k
+ if ( d2 < 32 ) then
+   do j=1,d1
+     !DIR$ LOOP COUNT (16)
+     do i=1,d2
+      B(i,j  ) = conjg(A(j  ,i))
+     enddo
+    enddo
+   return
+ else if (d1 > d2) then
+ !DIR$ forceinline 
+   k=d1/2
+ !DIR$ forceinline recursive
+   call cdadjoint(A(1,1),LDA,B(1,1),LDB,k,d2)
+ !DIR$ forceinline recursive
+   call cdadjoint(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2)
+   return
+ else
+ !DIR$ forceinline
+   k=d2/2
+ !DIR$ forceinline recursive
+   call cdadjoint(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k)
+ !DIR$ forceinline recursive
+   call cdadjoint(A(1,1),LDA,B(1,1),LDB,d1,k)
+   return
+ endif
+
+end
+
diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f
index 1b01a1ec..95abb9ab 100644
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@@ -317,6 +317,35 @@ double precision function u_dot_v(u,v,sze)
 
 end
 
+complex*16 function u_dot_v_complex(u,v,sze)
+  implicit none
+  BEGIN_DOC
+  ! Compute u^H . v
+  END_DOC
+  integer, intent(in)            :: sze
+  complex*16, intent(in)   :: u(sze),v(sze)
+  complex*16, external     :: zdotc
+  
+  !DIR$ FORCEINLINE
+  u_dot_v_complex = zdotc(sze,u,1,v,1)
+  
+end
+
+complex*16 function u_dot_v_complex_noconj(u,v,sze)
+  implicit none
+  BEGIN_DOC
+  ! Compute u^T . v (don't take complex conjugate of elements of u)
+  ! use this if u is already stored as <u| (rather than as |u>)
+  END_DOC
+  integer, intent(in)            :: sze
+  complex*16, intent(in)   :: u(sze),v(sze)
+  complex*16, external     :: zdotu
+  
+  !DIR$ FORCEINLINE
+  u_dot_v_complex_noconj = zdotu(sze,u,1,v,1)
+  
+end
+
 double precision function u_dot_u(u,sze)
   implicit none
   BEGIN_DOC
@@ -331,6 +360,20 @@ double precision function u_dot_u(u,sze)
 
 end
 
+double precision function u_dot_u_complex(u,sze)
+  implicit none
+  BEGIN_DOC
+  ! Compute <u|u>
+  END_DOC
+  integer, intent(in)            :: sze
+  complex*16, intent(in)   :: u(sze)
+  complex*16, external     :: zdotc
+  
+  !DIR$ FORCEINLINE
+  u_dot_u_complex = real(zdotc(sze,u,1,u,1))
+  
+end
+
 subroutine normalize(u,sze)
   implicit none
   BEGIN_DOC
@@ -353,6 +396,28 @@ subroutine normalize(u,sze)
   endif
 end
 
+subroutine normalize_complex(u,sze)
+  implicit none
+  BEGIN_DOC
+  ! Normalizes vector u
+  END_DOC
+  integer, intent(in)            :: sze
+  complex*16, intent(inout):: u(sze)
+  double precision               :: d
+  double precision, external     :: dznrm2
+  integer                        :: i
+  
+  !DIR$ FORCEINLINE
+  d = dznrm2(sze,u,1)
+  if (d /= 0.d0) then
+    d = 1.d0/d
+  endif
+  if (d /= 1.d0) then
+    !DIR$ FORCEINLINE
+    call zdscal(sze,d,u,1)
+  endif
+end
+
 double precision function approx_dble(a,n)
   implicit none
   integer, intent(in) :: n
diff --git a/src/utils_complex/Gen_Ezfio_from_integral_complex_3idx.sh b/src/utils_complex/Gen_Ezfio_from_integral_complex_3idx.sh
new file mode 100755
index 00000000..94895f18
--- /dev/null
+++ b/src/utils_complex/Gen_Ezfio_from_integral_complex_3idx.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+ezfio=$1
+h5file=$2
+# Create the integral
+echo 'Create Integral'
+
+echo 'Create EZFIO'
+#read nel nmo natom <<< $(cat param) 
+#read e_nucl <<< $(cat e_nuc)
+#read nao <<< $(cat num_ao)
+#read nkpts <<< $(cat kpt_num)
+#read ndf <<< $(cat num_df)
+##./create_ezfio_complex_4idx.py $ezfio $nel $natom $nmo $e_nucl $nao $nkpts
+./create_ezfio_complex_3idx.py $ezfio $h5file #$nel $natom $nmo $e_nucl $nao $nkpts $ndf
+#Handle the orbital consitensy check
+qp_edit -c $ezfio &> /dev/null
+#cp $ezfio/{ao,mo}_basis/ao_md5 
+
+#qp_run import_ao_2e_complex $ezfio 
+#qp_run dump_ao_2e_from_df $ezfio
+#Read the integral
+#echo 'Read Integral'
+
+
+################################################
+##  using AO mono, 4-idx from pyscf           ##
+################################################
+#qp_run import_integrals_ao_periodic $ezfio 
+
+
+################################################
+##  using AO mono, 3-idx, mo coef from pyscf  ##
+################################################
+
+#qp_run read_ao_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_ao_df_complex $ezfio
+#qp_run read_mo_coef_complex $ezfio    #start from converged pyscf MOs
+#
+#qp_run save_mo_df_to_disk $ezfio
+#qp_run save_mo_bielec_to_disk $ezfio
+
+#qp_run mo_from_ao_orth $ezfio        #use canonical orthonormalized AOs as initial MO guess
+#qp_run print_H_matrix_restart $ezfio > hmat.out
+
+
+###############################################################
+##  using AO mono, full 4-idx AO bielec, mo coef from pyscf  ##
+###############################################################
+
+#qp_run read_ao_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_ao_eri_chunk_complex $ezfio 
+#qp_run read_mo_coef_complex $ezfio    #start from converged pyscf MOs
+##qp_run mo_from_ao_orth $ezfio        #use canonical orthonormalized AOs as initial MO guess
+
+
+######################################################
+##  using MO mono, full 4-idx MO bielec from pyscf  ##
+######################################################
+
+#qp_run read_mo_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_mo_eri_chunk_complex $ezfio 
+
diff --git a/src/utils_complex/Gen_Ezfio_from_pyscf_mol.sh b/src/utils_complex/Gen_Ezfio_from_pyscf_mol.sh
new file mode 100755
index 00000000..7caa20fa
--- /dev/null
+++ b/src/utils_complex/Gen_Ezfio_from_pyscf_mol.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+ezfio=$1
+h5file=$2
+# Create the integral
+echo 'Create Integral'
+
+echo 'Create EZFIO'
+#read nel nmo natom <<< $(cat param) 
+#read e_nucl <<< $(cat e_nuc)
+#read nao <<< $(cat num_ao)
+#read nkpts <<< $(cat kpt_num)
+#read ndf <<< $(cat num_df)
+##./create_ezfio_complex_4idx.py $ezfio $nel $natom $nmo $e_nucl $nao $nkpts
+./create_ezfio_pyscf_mol.py $ezfio $h5file #$nel $natom $nmo $e_nucl $nao $nkpts $ndf
+#Handle the orbital consitensy check
+#qp_edit -c $ezfio &> /dev/null
+#cp $ezfio/{ao,mo}_basis/ao_md5 
+
+#qp_run import_ao_2e_complex $ezfio 
+#qp_run dump_ao_2e_from_df $ezfio
+#Read the integral
+#echo 'Read Integral'
+
+
+################################################
+##  using AO mono, 4-idx from pyscf           ##
+################################################
+#qp_run import_integrals_ao_periodic $ezfio 
+
+
+################################################
+##  using AO mono, 3-idx, mo coef from pyscf  ##
+################################################
+
+#qp_run read_ao_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_ao_df_complex $ezfio
+#qp_run read_mo_coef_complex $ezfio    #start from converged pyscf MOs
+#
+#qp_run save_mo_df_to_disk $ezfio
+#qp_run save_mo_bielec_to_disk $ezfio
+
+#qp_run mo_from_ao_orth $ezfio        #use canonical orthonormalized AOs as initial MO guess
+#qp_run print_H_matrix_restart $ezfio > hmat.out
+
+
+###############################################################
+##  using AO mono, full 4-idx AO bielec, mo coef from pyscf  ##
+###############################################################
+
+#qp_run read_ao_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_ao_eri_chunk_complex $ezfio 
+#qp_run read_mo_coef_complex $ezfio    #start from converged pyscf MOs
+##qp_run mo_from_ao_orth $ezfio        #use canonical orthonormalized AOs as initial MO guess
+
+
+######################################################
+##  using MO mono, full 4-idx MO bielec from pyscf  ##
+######################################################
+
+#qp_run read_mo_mono_complex $ezfio 
+#qp_run read_kconserv $ezfio
+#qp_run read_mo_eri_chunk_complex $ezfio 
+
diff --git a/src/utils_complex/MolPyscfToQPkpts.py b/src/utils_complex/MolPyscfToQPkpts.py
new file mode 100644
index 00000000..b13a2dba
--- /dev/null
+++ b/src/utils_complex/MolPyscfToQPkpts.py
@@ -0,0 +1,1127 @@
+import numpy as np
+from functools import reduce
+
+
+def memoize(f):
+    memo = {}
+    def helper(x):
+        if x not in memo:
+            memo[x] = f(x)
+        return memo[x]
+    return helper
+
+@memoize
+def idx2_tri(iijj):
+    '''
+    iijj should be a 2-tuple
+    return triangular compound index for (0-indexed counting)
+    '''
+    ij1=min(iijj)
+    ij2=max(iijj)
+    return ij1+(ij2*(ij2+1))//2
+#    return ij1+(ij2*(ij2-1))//2
+
+def pad(arr_in,outshape):
+    arr_out = np.zeros(outshape,dtype=np.complex128)
+    dataslice = tuple(slice(0,arr_in.shape[dim]) for dim in range(len(outshape)))
+    arr_out[dataslice] = arr_in
+    return arr_out
+
+def idx40(i,j,k,l):
+    return idx2_tri((idx2_tri((i,k)),idx2_tri((j,l))))
+
+def idx4(i,j,k,l):
+    return idx2_tri((idx2_tri((i-1,k-1)),idx2_tri((j-1,l-1))))+1
+
+def stri4(i,j,k,l):
+    return (4*'{:5d}').format(i,j,k,l)
+
+def stri4z(i,j,k,l,zr,zi):
+    return (4*'{:5d}'+2*'{:25.16e}').format(i,j,k,l,zr,zi)
+
+def stri2z(i,j,zr,zi):
+    return (2*'{:5d}'+2*'{:25.16e}').format(i,j,zr,zi)
+
+def strijklikjli4z(i,j,k,l,zr,zi):
+    return ('{:10d}'+ 2*'{:8d}'+4*'{:5d}'+2*'{:25.16e}').format(idx4(i,j,k,l),idx2_tri((i-1,k-1))+1,idx2_tri((j-1,l-1))+1,i,j,k,l,zr,zi)
+
+
+def makesq(vlist,n1,n2):
+    '''
+    make hermitian matrices of size (n2 x n2) from from lower triangles
+    vlist is n1 lower triangles in flattened form
+    given: ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t],2,4)
+           output a 2x4x4 array, where each 4x4 is the square constructed from the lower triangle
+    [
+     [
+      [a  b* d* g*]
+      [b  c  e* h*]
+      [d  e  f  i*]
+      [g  h  i  j ]
+     ],
+     [
+      [k  l* n* q*]
+      [l  m  o* r*]
+      [n  o  p  s*]
+      [q  r  s  t ]
+     ]
+    ]
+    '''
+    out=np.zeros([n1,n2,n2],dtype=np.complex128)
+    n0 = vlist.shape[0]
+    lmask=np.tri(n2,dtype=bool)
+    for i in range(n0):
+        out[i][lmask] = vlist[i].conj()
+    out2=out.transpose([0,2,1])
+    for i in range(n0):
+        out2[i][lmask] = vlist[i]
+    return out2
+
+
+def makesq3(vlist,n2):
+    '''
+    make hermitian matrices of size (n2 x n2) from from lower triangles
+    vlist is n1 lower triangles in flattened form
+    given: ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t],2,4)
+           output a 2x4x4 array, where each 4x4 is the square constructed from the lower triangle
+    [
+     [
+      [a  b* d* g*]
+      [b  c  e* h*]
+      [d  e  f  i*]
+      [g  h  i  j ]
+     ],
+     [
+      [k  l* n* q*]
+      [l  m  o* r*]
+      [n  o  p  s*]
+      [q  r  s  t ]
+     ]
+    ]
+    '''
+    n0 = vlist.shape[0]
+    out=np.zeros([n0,n2,n2],dtype=np.complex128)
+    lmask=np.tri(n2,dtype=bool)
+    for i in range(n0):
+        out[i][lmask] = vlist[i].conj()
+    out2=out.transpose([0,2,1])
+    for i in range(n0):
+        out2[i][lmask] = vlist[i]
+    return out2
+
+def makesq2(vlist,n1,n2):
+    out=np.zeros([n1,n2,n2],dtype=np.complex128)
+    lmask=np.tri(n2,dtype=bool)
+    tmp=np.zeros([n2,n2],dtype=np.complex128)
+    tmp2=np.zeros([n2,n2],dtype=np.complex128)
+    for i in range(n1):
+        tmp[lmask] = vlist[i].conj()
+        tmp2=tmp.T
+        tmp2[lmask] = vlist[i]
+        out[i] = tmp2.copy()
+    return out
+
+
+def get_phase(cell, kpts, kmesh=None):
+    '''
+    The unitary transformation that transforms the supercell basis k-mesh
+    adapted basis.
+    '''
+    from pyscf.pbc import tools
+    from pyscf import lib
+
+    latt_vec = cell.lattice_vectors()
+    if kmesh is None:
+        # Guess kmesh
+        scaled_k = cell.get_scaled_kpts(kpts).round(8)
+        kmesh = (len(np.unique(scaled_k[:,0])),
+                 len(np.unique(scaled_k[:,1])),
+                 len(np.unique(scaled_k[:,2])))
+
+    R_rel_a = np.arange(kmesh[0])
+    R_rel_b = np.arange(kmesh[1])
+    R_rel_c = np.arange(kmesh[2])
+    R_vec_rel = lib.cartesian_prod((R_rel_a, R_rel_b, R_rel_c))
+    R_vec_abs = np.einsum('nu, uv -> nv', R_vec_rel, latt_vec)
+
+    NR = len(R_vec_abs)
+    phase = np.exp(1j*np.einsum('Ru, ku -> Rk', R_vec_abs, kpts))
+    phase /= np.sqrt(NR)  # normalization in supercell
+
+    # R_rel_mesh has to be construct exactly same to the Ts in super_cell function
+    scell = tools.super_cell(cell, kmesh)
+    return scell, phase
+
+def mo_k2gamma(cell, mo_energy, mo_coeff, kpts, kmesh=None):
+    '''
+    Transform MOs in Kpoints to the equivalents supercell
+    '''
+    from pyscf import lib
+    import scipy.linalg as la
+    scell, phase = get_phase(cell, kpts, kmesh)
+
+    E_g = np.hstack(mo_energy)
+    C_k = np.asarray(mo_coeff)
+    Nk, Nao, Nmo = C_k.shape
+    NR = phase.shape[0]
+
+    # Transform AO indices
+    C_gamma = np.einsum('Rk, kum -> Rukm', phase, C_k)
+    C_gamma = C_gamma.reshape(Nao*NR, Nk*Nmo)
+
+    E_sort_idx = np.argsort(E_g)
+    E_g = E_g[E_sort_idx]
+    C_gamma = C_gamma[:,E_sort_idx]
+    s = scell.pbc_intor('int1e_ovlp')
+    assert(abs(reduce(np.dot, (C_gamma.conj().T, s, C_gamma))
+               - np.eye(Nmo*Nk)).max() < 1e-7)
+
+    # Transform MO indices
+    E_k_degen = abs(E_g[1:] - E_g[:-1]).max() < 1e-5
+    if np.any(E_k_degen):
+        degen_mask = np.append(False, E_k_degen) | np.append(E_k_degen, False)
+        shift = min(E_g[degen_mask]) - .1
+        f = np.dot(C_gamma[:,degen_mask] * (E_g[degen_mask] - shift),
+                   C_gamma[:,degen_mask].conj().T)
+        assert(abs(f.imag).max() < 1e-5)
+
+        e, na_orb = la.eigh(f.real, s, type=2)
+        C_gamma[:,degen_mask] = na_orb[:, e>0]
+
+    if abs(C_gamma.imag).max() < 1e-7:
+        print('!Warning  Some complexe pollutions in MOs are present')
+
+    C_gamma = C_gamma.real
+    if  abs(reduce(np.dot, (C_gamma.conj().T, s, C_gamma)) - np.eye(Nmo*Nk)).max() < 1e-7:
+        print('!Warning  Some complexe pollutions in MOs are present')
+
+    s_k = cell.pbc_intor('int1e_ovlp', kpts=kpts)
+    # overlap between k-point unitcell and gamma-point supercell
+    s_k_g = np.einsum('kuv,Rk->kuRv', s_k, phase.conj()).reshape(Nk,Nao,NR*Nao)
+    # The unitary transformation from k-adapted orbitals to gamma-point orbitals
+    mo_phase = lib.einsum('kum,kuv,vi->kmi', C_k.conj(), s_k_g, C_gamma)
+
+    return mo_phase
+
+def qp2rename():
+    import shutil
+    qp2names={}
+    qp2names['mo_coef_complex'] = 'C.qp'
+    qp2names['bielec_ao_complex'] = 'W.qp'
+
+    qp2names['kinetic_ao_complex'] = 'T.qp'
+    qp2names['ne_ao_complex'] = 'V.qp'
+    qp2names['overlap_ao_complex'] = 'S.qp'
+
+
+    for old,new in qp2names.items():
+        shutil.move(old,new)
+    shutil.copy('e_nuc','E.qp')
+
+def print_mo_bi(mf,kconserv=None,outfilename='W.mo.qp',cas_idx=None,bielec_int_threshold = 1E-8):
+
+    cell = mf.cell
+    kpts = mf.kpts
+    #nao = mf.cell.nao
+    #Nk = kpts.shape[0]
+    
+    mo_coeff = mf.mo_coeff
+    # Mo_coeff actif
+    mo_k = np.array([c[:,cas_idx] for c in mo_coeff] if cas_idx is not None else mo_coeff)
+  
+    Nk, nao, nmo = mo_k.shape
+
+    if (kconserv is None):
+        from pyscf.pbc import tools
+        kconserv = tools.get_kconserv(cell, kpts)
+
+    with open(outfilename,'w') as outfile:
+        for d, kd in enumerate(kpts):
+            for c, kc in enumerate(kpts):
+                if c > d: break
+                #idx2_cd = idx2_tri((c,d))
+                for b, kb in enumerate(kpts):
+                    if b > d: break
+                    a = kconserv[b,c,d]
+                    if a > d: continue
+                    #if idx2_tri((a,b)) > idx2_cd: continue
+                    #if ((c==d) and (a>b)): continue
+                    ka = kpts[a]
+                    eri_4d_mo_kpt = mf.with_df.ao2mo([mo_k[a], mo_k[b], mo_k[c], mo_k[d]],
+                                                      [ka,kb,kc,kd],compact=False).reshape((nmo,)*4)
+                    eri_4d_mo_kpt *= 1./Nk
+                    for l in range(nmo):
+                        ll=l+d*nmo
+                        for j in range(nmo):
+                            jj=j+c*nmo
+                            if jj>ll: break
+                            idx2_jjll = idx2_tri((jj,ll))
+                            for k in range(nmo):
+                                kk=k+b*nmo
+                                if kk>ll: break
+                                for i in range(nmo):
+                                    ii=i+a*nmo
+                                    if idx2_tri((ii,kk)) > idx2_jjll: break
+                                    if ((jj==ll) and (ii>kk)): break
+                                    v=eri_4d_mo_kpt[i,k,j,l]
+                                    if (abs(v) > bielec_int_threshold):
+                                        outfile.write(stri4z(ii+1,jj+1,kk+1,ll+1,
+                                                             v.real,v.imag)+'\n')
+
+
+def print_ao_bi(mf,kconserv=None,outfilename='W.ao.qp',bielec_int_threshold = 1E-8):
+
+    cell = mf.cell
+    kpts = mf.kpts
+    nao = mf.cell.nao
+    Nk = kpts.shape[0]
+
+    if (kconserv is None):
+        from pyscf.pbc.tools import get_kconserv
+        kconserv = get_kconserv(cell, kpts)
+
+    with open(outfilename,'w') as outfile:
+        for d, kd in enumerate(kpts):
+            for c, kc in enumerate(kpts):
+                if c > d: break
+                #idx2_cd = idx2_tri((c,d))
+                for b, kb in enumerate(kpts):
+                    if b > d: break
+                    a = kconserv[b,c,d]
+                    if a > d: continue
+                    #if idx2_tri((a,b)) > idx2_cd: continue
+                    #if ((c==d) and (a>b)): continue
+                    ka = kpts[a]
+
+                    eri_4d_ao_kpt = mf.with_df.get_ao_eri(kpts=[ka,kb,kc,kd],
+                                                          compact=False).reshape((nao,)*4)
+                    eri_4d_ao_kpt *= 1./Nk
+                    for l in range(nao):
+                        ll=l+d*nao
+                        for j in range(nao):
+                            jj=j+c*nao
+                            if jj>ll: break
+                            idx2_jjll = idx2_tri((jj,ll))
+                            for k in range(nao):
+                                kk=k+b*nao
+                                if kk>ll: break
+                                for i in range(nao):
+                                    ii=i+a*nao
+                                    if idx2_tri((ii,kk)) > idx2_jjll: break
+                                    if ((jj==ll) and (ii>kk)): break
+                                    v=eri_4d_ao_kpt[i,k,j,l]
+                                    if (abs(v) > bielec_int_threshold):
+                                        outfile.write(stri4z(ii+1,jj+1,kk+1,ll+1,
+                                                             v.real,v.imag)+'\n')
+
+
+def print_kcon_chem_to_phys(kcon,fname):
+    '''
+    input: kconserv in chem notation kcon_c[a,b,c] = d
+           where (ab|cd) is allowed by symmetry
+    output: kconserv in phys notation kcon_p[i,j,k] = l
+            where <ij|kl> is allowed by symmetry
+            (printed to file)
+    '''
+    Nk,n2,n3 = kcon.shape
+    if (n2!=n3 or Nk!=n2):
+        raise Exception('print_kcon_chem_to_phys called with non-cubic array')
+
+    with open(fname,'w') as outfile:
+        for a in range(Nk):
+            for b in range(Nk):
+                for c in range(Nk):
+                    d = kcon[a,b,c]
+                    outfile.write(stri4(a+1,c+1,b+1,d+1)+'\n')
+    
+def print_kpts_unblocked(ints_k,outfilename,thresh):
+    '''
+    for ints_k of shape (Nk,n1,n2),
+    print the elements of the corresponding block-diagonal matrix of shape (Nk*n1,Nk*n2) in file
+    '''
+    Nk,n1,n2 = ints_k.shape
+    with open(outfilename,'w') as outfile:
+        for ik in range(Nk):
+            shift1 = ik*n1+1
+            shift2 = ik*n2+1
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    int_ij = ints_k[ik,i1,i2]
+                    if abs(int_ij) > thresh:
+                        outfile.write(stri2z(i1+shift1, i2+shift2, int_ij.real, int_ij.imag)+'\n')
+    return
+
+def print_kpts_unblocked_upper(ints_k,outfilename,thresh):
+    '''
+    for hermitian ints_k of shape (Nk,n1,n1),
+    print the elements of the corresponding block-diagonal matrix of shape (Nk*n1,Nk*n1) in file
+    (only upper triangle is printed)
+    '''
+    Nk,n1,n2 = ints_k.shape
+    if (n1!=n2):
+        raise Exception('print_kpts_unblocked_upper called with non-square matrix')
+
+    with open(outfilename,'w') as outfile:
+        for ik in range(Nk):
+            shift = ik*n1+1
+            for i1 in range(n1):
+                for i2 in range(i1,n1):
+                    int_ij = ints_k[ik,i1,i2]
+                    if abs(int_ij) > thresh:
+                        outfile.write(stri2z(i1+shift, i2+shift, int_ij.real, int_ij.imag)+'\n')
+    return
+
+
+
+def get_kin_ao(mf):
+    nao = mf.cell.nao_nr()
+    Nk = len(mf.kpts)
+    return np.reshape(mf.cell.pbc_intor('int1e_kin',1,1,kpts=mf.kpts),(Nk,nao,nao))
+
+def get_ovlp_ao(mf):
+    nao = mf.cell.nao_nr()
+    Nk = len(mf.kpts)
+    return np.reshape(mf.get_ovlp(cell=mf.cell,kpts=mf.kpts),(Nk,nao,nao))
+
+def get_pot_ao(mf):
+    nao = mf.cell.nao_nr()
+    Nk = len(mf.kpts)
+
+    if mf.cell.pseudo:
+        v_kpts_ao = np.reshape(mf.with_df.get_pp(kpts=mf.kpts),(Nk,nao,nao))
+    else:
+        v_kpts_ao = np.reshape(mf.with_df.get_nuc(kpts=mf.kpts),(Nk,nao,nao))
+
+    if len(mf.cell._ecpbas) > 0:
+        from pyscf.pbc.gto import ecp
+        v_kpts_ao += np.reshape(ecp.ecp_int(mf.cell, mf.kpts),(Nk,nao,nao))
+
+    return v_kpts_ao
+
+def ao_to_mo_1e(ao_kpts,mo_coef):
+    return np.einsum('kim,kij,kjn->kmn',mo_coef.conj(),ao_kpts,mo_coef)
+
+def get_j3ao_old(fname,nao,Nk):
+    '''
+    returns list of Nk_pair arrays of shape (naux,nao,nao)
+    if naux is the same for each pair, returns numpy array
+    if naux is not the same for each pair, returns array of arrays
+    '''
+    import h5py
+    with h5py.File(fname,'r') as intfile:
+        j3c = intfile.get('j3c')
+        j3ckeys = list(j3c.keys())
+        j3ckeys.sort(key=lambda strkey:int(strkey))
+    
+        # in new(?) version of PySCF, there is an extra layer of groups before the datasets
+        # datasets used to be [/j3c/0,   /j3c/1,   /j3c/2,   ...]
+        # datasets now are    [/j3c/0/0, /j3c/1/0, /j3c/2/0, ...]
+        j3clist = [j3c.get(i+'/0') for i in j3ckeys]
+        #if j3clist==[None]*len(j3clist):
+        if not(any(j3clist)):
+        # if using older version, stop before last level
+            j3clist = [j3c.get(i) for i in j3ckeys]
+    
+        naosq = nao*nao
+        naotri = (nao*(nao+1))//2
+        nkinvsq = 1./np.sqrt(Nk)
+    
+        # dimensions are (kikj,iaux,jao,kao), where kikj is compound index of kpts i and j
+        # output dimensions should be reversed (nao, nao, naux, nkptpairs)
+        return np.array([(i.value.reshape([-1,nao,nao]) if (i.shape[1] == naosq) else makesq3(i.value,nao)) * nkinvsq for i in j3clist])
+
+def get_j3ao(fname,nao,Nk):
+    '''
+    returns padded df AO array
+    fills in zeros when functions are dropped due to linear dependency
+    last AO index corresponds to smallest kpt index?
+    (k, mu, i, j) where i.kpt >= j.kpt
+    '''
+    import h5py
+    with h5py.File(fname,'r') as intfile:
+        j3c = intfile.get('j3c')
+        j3ckeys = list(j3c.keys())
+        nkpairs = len(j3ckeys)
+
+        # get num order instead of lex order
+        j3ckeys.sort(key=lambda strkey:int(strkey))
+
+        # in new(?) version of PySCF, there is an extra layer of groups before the datasets
+        # datasets used to be [/j3c/0,   /j3c/1,   /j3c/2,   ...]
+        # datasets now are    [/j3c/0/0, /j3c/1/0, /j3c/2/0, ...]
+        keysub = '/0' if bool(j3c.get('0/0',getclass=True)) else ''
+
+        naux = max(map(lambda k: j3c[k+keysub].shape[0],j3c.keys()))
+
+        naosq = nao*nao
+        naotri = (nao*(nao+1))//2
+        nkinvsq = 1./np.sqrt(Nk)
+
+        j3arr = np.zeros((nkpairs,naux,nao,nao),dtype=np.complex128)
+
+        for i,kpair in enumerate(j3ckeys):
+            iaux,dim2 = j3c[kpair+keysub].shape
+            if (dim2==naosq):
+                j3arr[i,:iaux,:,:] = j3c[kpair+keysub][()].reshape([iaux,nao,nao]) * nkinvsq
+                #j3arr[i,:iaux,:,:] = j3c[kpair+keysub][()].reshape([iaux,nao,nao]).transpose((0,2,1)) * nkinvsq
+            else:
+                j3arr[i,:iaux,:,:] = makesq3(j3c[kpair+keysub][()],nao) * nkinvsq
+                #j3arr[i,:iaux,:,:] = makesq3(j3c[kpair+keysub][()].conj(),nao) * nkinvsq
+
+        return j3arr
+
+def get_j3ao_new(fname,nao,Nk):
+    '''
+    returns padded df AO array
+    fills in zeros when functions are dropped due to linear dependency
+    last AO index corresponds to largest kpt index?
+    (k, mu, j, i) where i.kpt >= j.kpt
+    '''
+    import h5py
+    with h5py.File(fname,'r') as intfile:
+        j3c = intfile.get('j3c')
+        j3ckeys = list(j3c.keys())
+        nkpairs = len(j3ckeys)
+
+        # get num order instead of lex order
+        j3ckeys.sort(key=lambda strkey:int(strkey))
+
+        # in new(?) version of PySCF, there is an extra layer of groups before the datasets
+        # datasets used to be [/j3c/0,   /j3c/1,   /j3c/2,   ...]
+        # datasets now are    [/j3c/0/0, /j3c/1/0, /j3c/2/0, ...]
+        keysub = '/0' if bool(j3c.get('0/0',getclass=True)) else ''
+
+        naux = max(map(lambda k: j3c[k+keysub].shape[0],j3c.keys()))
+
+        naosq = nao*nao
+        naotri = (nao*(nao+1))//2
+        nkinvsq = 1./np.sqrt(Nk)
+
+        j3arr = np.zeros((nkpairs,naux,nao,nao),dtype=np.complex128)
+
+        for i,kpair in enumerate(j3ckeys):
+            iaux,dim2 = j3c[kpair+keysub].shape
+            if (dim2==naosq):
+                j3arr[i,:iaux,:,:] = j3c[kpair+keysub][()].reshape([iaux,nao,nao]).transpose((0,2,1)) * nkinvsq
+            else:
+                j3arr[i,:iaux,:,:] = makesq3(j3c[kpair+keysub][()].conj(),nao) * nkinvsq
+
+        return j3arr
+
+def print_df(j3arr,fname,thresh):
+    with open(fname,'w') as outfile:
+        for k,kpt_pair in enumerate(j3arr):
+            for iaux,dfbasfunc in enumerate(kpt_pair):
+                for i,i0 in enumerate(dfbasfunc):
+                    for j,v in enumerate(i0):
+                        if (abs(v) > thresh):
+                            outfile.write(stri4z(i+1,j+1,iaux+1,k+1,v.real,v.imag)+'\n')
+    return
+
+def df_pad_ref_test(j3arr,nao,naux,nkpt_pairs):
+    df_ao_tmp = np.zeros((nao,nao,naux,nkpt_pairs),dtype=np.complex128)
+    for k,kpt_pair in enumerate(j3arr):
+        for iaux,dfbasfunc in enumerate(kpt_pair):
+            for i,i0 in enumerate(dfbasfunc):
+                for j,v in enumerate(i0):
+                    df_ao_tmp[i,j,iaux,k]=v
+    return df_ao_tmp
+
+
+def df_ao_to_mo(j3ao,mo_coef):
+    from itertools import product
+    Nk = mo_coef.shape[0]
+    kpair_list = ((i,j,idx2_tri((i,j))) for (i,j) in product(range(Nk),repeat=2) if (i>=j))
+    return np.array([
+        np.einsum('mij,ik,jl->mkl',j3ao[kij],mo_coef[ki].conj(),mo_coef[kj])
+        for ki,kj,kij in kpair_list])
+
+    
+def df_ao_to_mo_new(j3ao,mo_coef):
+    #TODO: fix this (C/F ordering, conj, transpose, view cmplx->float)
+
+    from itertools import product
+    Nk = mo_coef.shape[0]
+    return np.array([
+        np.einsum('mji,ik,jl->mlk',j3ao[idx2_tri((ki,kj))],mo_coef[ki].conj(),mo_coef[kj])
+        for ki,kj in product(range(Nk),repeat=2) if (ki>=kj)],dtype=np.complex128)
+
+def df_ao_to_mo_test(j3ao,mo_coef):
+    from itertools import product
+    Nk = mo_coef.shape[0]
+    return np.array([
+        np.einsum('mij,ik,jl->mkl',j3ao[idx2_tri((ki,kj))],mo_coef[ki].conj(),mo_coef[kj])
+        for ki,kj in product(range(Nk),repeat=2) if (ki>=kj)])
+
+def pyscf2QP2_mo(cell,mf,kpts,kmesh=None,cas_idx=None, int_threshold = 1E-8,qph5path='qpdat.h5'):
+    pyscf2QP2(cell,mf,kpts,kmesh,cas_idx,int_threshold,qph5path,
+            print_ao_ints_df=False,
+            print_mo_ints_df=True,
+            print_ao_ints_mono=False,
+            print_mo_ints_mono=True)
+    return
+
+
+
+def pyscf2QP2(cell,mf, kpts, kmesh=None, cas_idx=None, int_threshold = 1E-8, 
+        qph5path = 'qpdat.h5',
+        print_ao_ints_bi=False, 
+        print_mo_ints_bi=False, 
+        print_ao_ints_df=True, 
+        print_mo_ints_df=False, 
+        print_ao_ints_mono=True, 
+        print_mo_ints_mono=False,
+        print_debug=False):
+    '''
+    kpts = List of kpoints coordinates. Cannot be null, for gamma is other script
+    kmesh = Mesh of kpoints (optional)
+    cas_idx = List of active MOs. If not specified all MOs are actives
+    int_threshold = The integral will be not printed in they are bellow that
+    '''
+  
+#    from pyscf.pbc import ao2mo
+    from pyscf.pbc import tools
+    import h5py
+#    import scipy
+    from scipy.linalg import block_diag
+
+    mo_coef_threshold = int_threshold
+    ovlp_threshold = int_threshold
+    kin_threshold = int_threshold
+    ne_threshold = int_threshold
+    bielec_int_threshold = int_threshold
+    thresh_mono = int_threshold
+    
+    
+#    qph5path = 'qpdat.h5'
+    # create hdf5 file, delete old data if exists
+    with h5py.File(qph5path,'w') as qph5:
+        qph5.create_group('nuclei')
+        qph5.create_group('electrons')
+        qph5.create_group('ao_basis')
+        qph5.create_group('mo_basis')
+
+    mo_coeff = mf.mo_coeff
+    # Mo_coeff actif
+    mo_k = np.array([c[:,cas_idx] for c in mo_coeff] if cas_idx is not None else mo_coeff)
+    e_k =  np.array([e[cas_idx] for e in mf.mo_energy] if cas_idx is not None else mf.mo_energy)
+  
+    Nk, nao, nmo = mo_k.shape
+
+    print("n Kpts", Nk)
+    print("n active Mos per kpt", nmo)
+    print("n AOs per kpt", nao)
+
+    ##########################################
+    #                                        #
+    #                Nuclei                  #
+    #                                        #
+    ##########################################
+
+    natom = cell.natm
+    print('n_atom per kpt',   natom)
+
+    atom_xyz = mf.cell.atom_coords()
+    if not(mf.cell.unit.startswith(('B','b','au','AU'))):
+        from pyscf.data.nist import BOHR
+        atom_xyz /= BOHR # always convert to au
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['nuclei'].attrs['kpt_num']=Nk
+        qph5['nuclei'].attrs['nucl_num']=natom
+        qph5.create_dataset('nuclei/nucl_coord',data=atom_xyz)
+        qph5.create_dataset('nuclei/nucl_charge',data=mf.cell.atom_charges())
+
+        strtype=h5py.special_dtype(vlen=str)
+        atom_dset=qph5.create_dataset('nuclei/nucl_label',(natom,),dtype=strtype)
+        for i in range(natom):
+            atom_dset[i] = mf.cell.atom_pure_symbol(i)
+
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+
+    # nucleus on which each AO is centered
+    ao_nucl=[i[0] for i in mf.cell.ao_labels(fmt=False,base=1)]
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['mo_basis'].attrs['mo_num']=Nk*nmo
+        qph5['ao_basis'].attrs['ao_num']=Nk*nao
+
+        #qph5['ao_basis'].attrs['ao_basis']=mf.cell.basis
+        qph5['ao_basis'].attrs['ao_basis']="dummy basis"
+
+        qph5.create_dataset('ao_basis/ao_nucl',data=Nk*ao_nucl)
+
+    ##########################################
+    #                                        #
+    #              Electrons                 #
+    #                                        #
+    ##########################################
+
+    nelec = cell.nelectron
+    neleca,nelecb = cell.nelec
+
+    print('num_elec per kpt', nelec)
+
+    with h5py.File(qph5path,'a') as qph5:
+        #in old version: param << nelec*Nk, nmo*Nk, natom*Nk
+        qph5['electrons'].attrs['elec_alpha_num']=neleca*Nk 
+        qph5['electrons'].attrs['elec_beta_num']=nelecb*Nk
+
+    ##########################################
+    #                                        #
+    #           Nuclear Repulsion            #
+    #                                        #
+    ##########################################
+
+    #Total energy shift due to Ewald probe charge = -1/2 * Nelec*madelung/cell.vol =
+    shift = tools.pbc.madelung(cell, kpts)*cell.nelectron * -.5 
+    e_nuc = (cell.energy_nuc() + shift)*Nk
+  
+    print('nucl_repul', e_nuc)
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['nuclei'].attrs['nuclear_repulsion']=e_nuc
+  
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'a') as qph5:
+        # k,mo,ao(,2)
+        mo_coef_f = np.array(mo_k.transpose((0,2,1)),order='c',dtype=np.complex128)
+        #mo_coef_blocked=block_diag(*mo_k)
+        mo_coef_blocked_f = block_diag(*mo_coef_f)
+        qph5.create_dataset('mo_basis/mo_coef_complex',data=mo_coef_blocked_f.view(dtype=np.float64).reshape((Nk*nmo,Nk*nao,2)))
+        qph5.create_dataset('mo_basis/mo_coef_kpts',data=mo_coef_f.view(dtype=np.float64).reshape((Nk,nmo,nao,2)))
+   
+    if print_debug:
+        print_kpts_unblocked(mo_k,'C.qp',mo_coef_threshold)   
+
+    ##########################################
+    #                                        #
+    #            Integrals Mono              #
+    #                                        #
+    ##########################################
+    
+    ne_ao = get_pot_ao(mf)
+    kin_ao = get_kin_ao(mf)
+    ovlp_ao = get_ovlp_ao(mf)
+
+    if print_ao_ints_mono:
+
+        with h5py.File(qph5path,'a') as qph5:
+            kin_ao_f =  np.array(kin_ao.transpose((0,2,1)),order='c',dtype=np.complex128)
+            ovlp_ao_f = np.array(ovlp_ao.transpose((0,2,1)),order='c',dtype=np.complex128)
+            ne_ao_f =   np.array(ne_ao.transpose((0,2,1)),order='c',dtype=np.complex128)
+
+            qph5.create_dataset('ao_one_e_ints/ao_integrals_kinetic_kpts',data=kin_ao_f.view(dtype=np.float64).reshape((Nk,nao,nao,2)))
+            qph5.create_dataset('ao_one_e_ints/ao_integrals_overlap_kpts',data=ovlp_ao_f.view(dtype=np.float64).reshape((Nk,nao,nao,2)))
+            qph5.create_dataset('ao_one_e_ints/ao_integrals_n_e_kpts',    data=ne_ao_f.view(dtype=np.float64).reshape((Nk,nao,nao,2)))
+
+        if print_debug:
+            for fname,ints in zip(('S.qp','V.qp','T.qp'),
+                                  (ovlp_ao, ne_ao, kin_ao)):
+                print_kpts_unblocked_upper(ints,fname,thresh_mono)
+
+    if print_mo_ints_mono:
+        kin_mo = ao_to_mo_1e(kin_ao,mo_k)
+        ovlp_mo = ao_to_mo_1e(ovlp_ao,mo_k)
+        ne_mo = ao_to_mo_1e(ne_ao,mo_k)
+
+        with h5py.File(qph5path,'a') as qph5:
+            kin_mo_f =  np.array(kin_mo.transpose((0,2,1)),order='c',dtype=np.complex128)
+            ovlp_mo_f = np.array(ovlp_mo.transpose((0,2,1)),order='c',dtype=np.complex128)
+            ne_mo_f =   np.array(ne_mo.transpose((0,2,1)),order='c',dtype=np.complex128)
+
+            qph5.create_dataset('mo_one_e_ints/mo_integrals_kinetic_kpts',data=kin_mo_f.view(dtype=np.float64).reshape((Nk,nmo,nmo,2)))
+            qph5.create_dataset('mo_one_e_ints/mo_integrals_overlap_kpts',data=ovlp_mo_f.view(dtype=np.float64).reshape((Nk,nmo,nmo,2)))
+            qph5.create_dataset('mo_one_e_ints/mo_integrals_n_e_kpts',    data=ne_mo_f.view(dtype=np.float64).reshape((Nk,nmo,nmo,2)))
+        if print_debug:
+            for fname,ints in zip(('S.mo.qp','V.mo.qp','T.mo.qp'),
+                                  (ovlp_mo, ne_mo, kin_mo)):
+                print_kpts_unblocked_upper(ints,fname,thresh_mono)
+
+  
+    ##########################################
+    #                                        #
+    #               k-points                 #
+    #                                        #
+    ##########################################
+
+    kconserv = tools.get_kconserv(cell, kpts)
+
+    with h5py.File(qph5path,'a') as qph5:
+        kcon_f_phys = np.array(kconserv.transpose((1,2,0)),order='c')
+        qph5.create_dataset('nuclei/kconserv',data=kcon_f_phys+1)
+    
+    if print_debug:
+        print_kcon_chem_to_phys(kconserv,'K.qp')
+  
+    ##########################################
+    #                                        #
+    #             Integrals Bi               #
+    #                                        #
+    ##########################################
+
+    j3ao_new = get_j3ao_new(mf.with_df._cderi,nao,Nk)
+
+    # test? nkpt_pairs should be (Nk*(Nk+1))//2
+    nkpt_pairs, naux, _, _ = j3ao_new.shape
+
+    print("n df fitting functions", naux)
+    with h5py.File(qph5path,'a') as qph5:
+        qph5.create_group('ao_two_e_ints')
+        qph5['ao_two_e_ints'].attrs['df_num']=naux
+
+    if print_ao_ints_df:
+        if print_debug:
+            print_df(j3ao_new,'D.qp',bielec_int_threshold)
+
+        with h5py.File(qph5path,'a') as qph5:
+            qph5.create_dataset('ao_two_e_ints/df_ao_integrals',data=j3ao_new.view(dtype=np.float64).reshape((nkpt_pairs,naux,nao,nao,2)))
+
+    if print_mo_ints_df:
+
+        j3mo_new = df_ao_to_mo_new(j3ao_new,mo_k)
+
+        if print_debug:
+            print_df(j3mo_new,'D.mo.qp',bielec_int_threshold)
+
+        with h5py.File(qph5path,'a') as qph5:
+            qph5.create_dataset('mo_two_e_ints/df_mo_integrals',data=j3mo_new.view(dtype=np.float64).reshape((nkpt_pairs,naux,nmo,nmo,2)))
+
+    if (print_ao_ints_bi):
+        print_ao_bi(mf,kconserv,'W.qp',bielec_int_threshold)
+    if (print_mo_ints_bi):
+        print_mo_bi(mf,kconserv,'W.mo.qp',cas_idx,bielec_int_threshold)
+    return
+
+def xyzcount(s):
+    return list(map(s.count,['x','y','z']))
+
+def pyscf2QP2_mol(mf, cas_idx=None, int_threshold = 1E-8, 
+        qph5path = 'qpdat.h5',
+        print_debug=False):
+    '''
+    cas_idx = List of active MOs. If not specified all MOs are actives
+    int_threshold = The integral will be not printed in they are bellow that
+    norm should be one of 'sp', 'all', or None
+    '''
+  
+    import h5py
+
+    norm='sp'
+    mol = mf.mol
+    nao_c = mol.nao_cart()
+
+    mo_coef_threshold = int_threshold
+    ovlp_threshold = int_threshold
+    kin_threshold = int_threshold
+    ne_threshold = int_threshold
+    bielec_int_threshold = int_threshold
+    thresh_mono = int_threshold
+    
+    
+#    qph5path = 'qpdat.h5'
+    # create hdf5 file, delete old data if exists
+    with h5py.File(qph5path,'w') as qph5:
+        qph5.create_group('nuclei')
+        qph5.create_group('electrons')
+        qph5.create_group('ao_basis')
+        qph5.create_group('mo_basis')
+        qph5.create_group('pseudo')
+        qph5['pseudo'].attrs['do_pseudo']=False
+
+    if mf.mol.cart:
+        mo_coeff = mf.mo_coeff.copy()
+    else:
+        #c2s = mol.cart2sph_coeff(normalized=norm)
+        c2s = mol.cart2sph_coeff(normalized='sp')
+        #c2s = mol.cart2sph_coeff(normalized='all')
+        #c2s = mol.cart2sph_coeff(normalized=None)
+        mo_coeff = np.dot(c2s,mf.mo_coeff)
+    #TODO: clean this up; use mol.cart_labels(fmt=False)
+    dnormlbl1=["dxx","dyy","dzz"]
+    dnormfac1 = 2.0*np.sqrt(np.pi/5)
+
+    dnormlbl2=["dxy","dxz","dyz"]
+    dnormfac2 = 2.0*np.sqrt(np.pi/15)
+
+    fnormlbl1=["fxxx","fyyy","fzzz"]
+    fnormfac1 = 2.0*np.sqrt(np.pi/7)
+
+    fnormlbl2=["fxxy","fxxz","fxyy","fxzz","fyyz","fyzz"]
+    fnormfac2 = 2.0*np.sqrt(np.pi/35)
+
+    fnormlbl3=["fxyz"]
+    fnormfac3 = 2.0*np.sqrt(np.pi/105)
+
+    gnormlbl1=["gxxxx","gyyyy","gzzzz"]
+    gnormfac1 = 2.0*np.sqrt(np.pi/9)
+
+    gnormlbl2=["gxxxy","gxxxz","gxyyy","gxzzz","gyyyz","gyzzz"]
+    gnormfac2 = 2.0*np.sqrt(np.pi/63)
+
+    gnormlbl3=["gxxyy","gxxzz","gyyzz"]
+    gnormfac3 = 2.0*np.sqrt(np.pi/105)
+
+    gnormlbl4=["gxxyz","gxyyz","gxyzz"]
+    gnormfac4 = 2.0*np.sqrt(np.pi/315)
+
+    hnormlbl1=["hxxxxx","hyyyyy","hzzzzz"]
+    hnormfac1 = 2.0*np.sqrt(np.pi/11)
+
+    hnormlbl2=["hxxxxy","hxxxxz","hxyyyy","hxzzzz","hyyyyz","hyzzzz"]
+    hnormfac2 = 2.0*np.sqrt(np.pi/99)
+
+    hnormlbl3=["hxxxyy","hxxxzz","hxxyyy","hxxzzz","hyyyzz","hyyzzz"]
+    hnormfac3 = 2.0*np.sqrt(np.pi/231)
+
+    hnormlbl4=["hxxxyz","hxyyyz","hxyzzz"]
+    hnormfac4 = 2.0*np.sqrt(np.pi/693)
+
+    hnormlbl5=["hxxyyz","hxxyzz","hxyyzz"]
+    hnormfac5 = 2.0*np.sqrt(np.pi/1155)
+
+    for i_lbl,mo_lbl in enumerate(mol.cart_labels()):
+        if any(i in mo_lbl for i in dnormlbl1):
+            mo_coeff[i_lbl,:] *= dnormfac1
+        elif any(i in mo_lbl for i in dnormlbl2):
+            mo_coeff[i_lbl,:] *= dnormfac2
+        elif any(i in mo_lbl for i in fnormlbl1):
+            mo_coeff[i_lbl,:] *= fnormfac1
+        elif any(i in mo_lbl for i in fnormlbl2):
+            mo_coeff[i_lbl,:] *= fnormfac2
+        elif any(i in mo_lbl for i in fnormlbl3):
+            mo_coeff[i_lbl,:] *= fnormfac3
+        elif any(i in mo_lbl for i in gnormlbl1):
+            mo_coeff[i_lbl,:] *= gnormfac1
+        elif any(i in mo_lbl for i in gnormlbl2):
+            mo_coeff[i_lbl,:] *= gnormfac2
+        elif any(i in mo_lbl for i in gnormlbl3):
+            mo_coeff[i_lbl,:] *= gnormfac3
+        elif any(i in mo_lbl for i in gnormlbl4):
+            mo_coeff[i_lbl,:] *= gnormfac4
+        elif any(i in mo_lbl for i in hnormlbl1):
+            mo_coeff[i_lbl,:] *= hnormfac1
+        elif any(i in mo_lbl for i in hnormlbl2):
+            mo_coeff[i_lbl,:] *= hnormfac2
+        elif any(i in mo_lbl for i in hnormlbl3):
+            mo_coeff[i_lbl,:] *= hnormfac3
+        elif any(i in mo_lbl for i in hnormlbl4):
+            mo_coeff[i_lbl,:] *= hnormfac4
+        elif any(i in mo_lbl for i in hnormlbl5):
+            mo_coeff[i_lbl,:] *= hnormfac5
+
+    # Mo_coeff actif
+    mo_c = np.array([c[:,cas_idx] for c in mo_coeff] if cas_idx is not None else mo_coeff)
+    e_c =  np.array([e[cas_idx] for e in mf.mo_energy] if cas_idx is not None else mf.mo_energy)
+  
+    nao, nmo = mo_c.shape
+
+    print("n active MOs", nmo)
+    print("n AOs", nao)
+    assert nao==nao_c, "wrong number of AOs"
+
+    ##########################################
+    #                                        #
+    #                Nuclei                  #
+    #                                        #
+    ##########################################
+
+    natom = mol.natm
+    print('n_atom',   natom)
+
+    atom_xyz = mol.atom_coords(unit='Bohr')
+    #if not(mol.unit.startswith(('B','b','au','AU'))):
+    #    from pyscf.data.nist import BOHR
+    #    atom_xyz /= BOHR # always convert to au
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['nuclei'].attrs['nucl_num']=natom
+        qph5.create_dataset('nuclei/nucl_coord',data=atom_xyz)
+        qph5.create_dataset('nuclei/nucl_charge',data=mol.atom_charges())
+
+        strtype=h5py.special_dtype(vlen=str)
+        atom_dset=qph5.create_dataset('nuclei/nucl_label',(natom,),dtype=strtype)
+        for i in range(natom):
+            atom_dset[i] = mol.atom_pure_symbol(i)
+
+    ##########################################
+    #                                        #
+    #                 ECP                    #
+    #                                        #
+    ##########################################
+
+    if (mol.has_ecp()):
+        #atsymb = [mol.atom_pure_symbol(i) for i in range(natom)]
+        #pyecp = mol._ecp
+        ## nelec to remove for each atom
+        #nuc_z_remov = [pyecp[i][0] for i in atsymb]
+        #nl_per_atom = [len(pyecp[i][1]) for i in atsymb]
+        ## list of l-values for channels of each atom
+        #ecp_l = [[pyecp[i][1][j][0] for j in range(len(pyecp[i][1]))] for i in atsymb]
+        ## list of [exp,coef] for each channel (r**0,1,2,3,4,5,)
+        #ecp_ac = [[pyecp[i][1][j][1] for j in range(len(pyecp[i][1]))] for i in atsymb]
+        pyecp = [mol._ecp[mol.atom_pure_symbol(i)] for i in range(natom)]
+        nzrmv=[0]*natom
+        lmax=0
+        klocmax=0
+        knlmax=0
+        for i,(nz,dat) in enumerate(pyecp):
+            nzrmv[i]=nz
+            for lval,ac in dat:
+                if (lval==-1):
+                    klocmax=max(sum(len(j) for j in ac),klocmax)
+                else:
+                    lmax=max(lval,lmax)
+                    knlmax=max(sum(len(j) for j in ac),knlmax)
+        #psd_nk = np.zeros((natom,klocmax),dtype=int)
+        #psd_vk = np.zeros((natom,klocmax),dtype=float)
+        #psd_dzk = np.zeros((natom,klocmax),dtype=float)
+        #psd_nkl = np.zeros((natom,knlmax,lmax+1),dtype=int)
+        #psd_vkl = np.zeros((natom,knlmax,lmax+1),dtype=float)
+        #psd_dzkl = np.zeros((natom,knlmax,lmax+1),dtype=float)
+        klnlmax=max(klocmax,knlmax)
+        psd_n = np.zeros((lmax+2,klnlmax,natom),dtype=int)
+        psd_v = np.zeros((lmax+2,klnlmax,natom),dtype=float)
+        psd_dz = np.zeros((lmax+2,klnlmax,natom),dtype=float)
+        for i,(_,dat) in enumerate(pyecp):
+            for lval,ac in dat:
+                count=0
+                for ri,aici in enumerate(ac):
+                    for ai,ci in aici:
+                        psd_n[lval+1,count,i] = ri-2
+                        psd_v[lval+1,count,i] = ci
+                        psd_dz[lval+1,count,i] = ai
+                        count += 1
+        psd_nk = psd_n[0,:klocmax]
+        psd_vk = psd_v[0,:klocmax]
+        psd_dzk = psd_dz[0,:klocmax]
+        psd_nkl = psd_n[1:,:knlmax]
+        psd_vkl = psd_v[1:,:knlmax]
+        psd_dzkl = psd_dz[1:,:knlmax]
+        with h5py.File(qph5path,'a') as qph5:
+            qph5['pseudo'].attrs['do_pseudo']=True
+            qph5['pseudo'].attrs['pseudo_lmax']=lmax
+            qph5['pseudo'].attrs['pseudo_klocmax']=klocmax
+            qph5['pseudo'].attrs['pseudo_kmax']=knlmax
+            qph5.create_dataset('pseudo/nucl_charge_remove',data=nzrmv)
+            qph5.create_dataset('pseudo/pseudo_n_k',data=psd_nk)
+            qph5.create_dataset('pseudo/pseudo_n_kl',data=psd_nkl)
+            qph5.create_dataset('pseudo/pseudo_v_k',data=psd_vk)
+            qph5.create_dataset('pseudo/pseudo_v_kl',data=psd_vkl)
+            qph5.create_dataset('pseudo/pseudo_dz_k',data=psd_dzk)
+            qph5.create_dataset('pseudo/pseudo_dz_kl',data=psd_dzkl)
+
+        ## nelec to remove for each atom
+        #nuc_z_remov = [i[0] for i in pyecp]
+        #nl_per_atom = [len(i[1]) for i in pyecp]
+        ## list of l-values for channels of each atom
+        #ecp_l = [[ j[0] for j in i[1] ] for i in pyecp]
+        #lmax = max(map(max,ecp_l))
+        ## list of [exp,coef] for each channel (r**0,1,2,3,4,5,)
+        #ecp_ac = [[ j[1] for j in i[1] ] for i in pyecp]
+
+
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+
+    # nucleus on which each AO is centered
+    ao_nucl=[i[0] for i in mf.mol.ao_labels(fmt=False,base=1)]
+
+
+    nprim_max = 0
+    for iatom, (sh0,sh1,ao0,ao1) in enumerate(mol.aoslice_by_atom()):
+        for ib in range(sh0,sh1): # sets of contracted exponents
+            nprim = mol.bas_nprim(ib)
+            if (nprim > nprim_max):
+                nprim_max = nprim
+    
+    qp_prim_num = np.zeros((nao),dtype=int)
+    qp_coef = np.zeros((nao,nprim_max))
+    qp_expo = np.zeros((nao,nprim_max))
+    qp_nucl = np.zeros((nao),dtype=int)
+    qp_pwr = np.zeros((nao,3),dtype=int)
+    
+    clabels = mol.cart_labels(fmt=False)
+    
+    tmp_idx=0
+    for iatom, (sh0,sh1,ao0,ao1) in enumerate(mol.aoslice_by_atom()):
+        # shell start,end; AO start,end (sph or cart) for each atom
+        for ib in range(sh0,sh1): # sets of contracted exponents
+            l = mol.bas_angular(ib)    # angular momentum
+            nprim = mol.bas_nprim(ib)  # numer of primitives
+            es = mol.bas_exp(ib)       # exponents
+            cs = mol.bas_ctr_coeff(ib) # coeffs
+            nctr = mol.bas_nctr(ib)    # number of contractions
+            print(iatom,ib,l,nprim,nctr,tmp_idx)
+            for ic in range(nctr): # sets of contraction coeffs
+                for nfunc in range(((l+1)*(l+2))//2): # always use cart for qp ao basis?
+                    qp_expo[tmp_idx,:nprim] = es[:]
+                    qp_coef[tmp_idx,:nprim] = cs[:,ic]
+                    qp_nucl[tmp_idx] = iatom + 1
+                    qp_pwr[tmp_idx,:] = xyzcount(clabels[tmp_idx][3])
+                    qp_prim_num[tmp_idx] = nprim
+                    tmp_idx += 1
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['mo_basis'].attrs['mo_num']=nmo
+        qph5['ao_basis'].attrs['ao_num']=nao
+
+        #qph5['ao_basis'].attrs['ao_basis']=mf.cell.basis
+        qph5['ao_basis'].attrs['ao_basis']="dummy basis"
+
+        qph5.create_dataset('ao_basis/ao_nucl',data=qp_nucl)
+        qph5.create_dataset('ao_basis/ao_prim_num',data=qp_prim_num)
+        qph5.create_dataset('ao_basis/ao_expo',data=qp_expo.T)
+        qph5.create_dataset('ao_basis/ao_coef',data=qp_coef.T)
+        qph5.create_dataset('ao_basis/ao_power',data=qp_pwr.T)
+
+    ##########################################
+    #                                        #
+    #              Electrons                 #
+    #                                        #
+    ##########################################
+
+    nelec = mol.nelectron
+    neleca,nelecb = mol.nelec
+
+    print('num_elec', nelec)
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['electrons'].attrs['elec_alpha_num']=neleca 
+        qph5['electrons'].attrs['elec_beta_num']=nelecb
+
+    ##########################################
+    #                                        #
+    #           Nuclear Repulsion            #
+    #                                        #
+    ##########################################
+
+    e_nuc = mol.energy_nuc()
+  
+    print('nucl_repul', e_nuc)
+
+    with h5py.File(qph5path,'a') as qph5:
+        qph5['nuclei'].attrs['nuclear_repulsion']=e_nuc
+  
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'a') as qph5:
+        qph5.create_dataset('mo_basis/mo_coef',data=mo_c.T)
+   
+    return
diff --git a/src/utils_periodic/NEED b/src/utils_complex/NEED
similarity index 100%
rename from src/utils_periodic/NEED
rename to src/utils_complex/NEED
diff --git a/src/utils_complex/README.rst b/src/utils_complex/README.rst
new file mode 100644
index 00000000..6bdb2ca7
--- /dev/null
+++ b/src/utils_complex/README.rst
@@ -0,0 +1,6 @@
+=====
+dummy
+=====
+
+Module necessary to avoid the ``xxx  is a root module but does not contain a main file`` message.
+
diff --git a/src/utils_complex/create_ezfio_complex_3idx.py b/src/utils_complex/create_ezfio_complex_3idx.py
new file mode 100755
index 00000000..afbe33e2
--- /dev/null
+++ b/src/utils_complex/create_ezfio_complex_3idx.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python
+from ezfio import ezfio
+import h5py
+
+import sys
+import numpy as np
+fname = sys.argv[1]
+qph5name = sys.argv[2]
+
+#qph5=h5py.File(qph5path,'r')
+
+def convert_kpts(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(True)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kpt_num = qph5['nuclei'].attrs['kpt_num']
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_kpt_num(kpt_num)
+    kpt_pair_num = (kpt_num*kpt_num + kpt_num)//2
+    ezfio.set_nuclei_kpt_pair_num(kpt_pair_num)
+    
+    # don't multiply nuclei by kpt_num
+    # work in k-space, not in equivalent supercell
+    nucl_num_per_kpt = nucl_num 
+    ezfio.set_nuclei_nucl_num(nucl_num_per_kpt)
+    
+    # these are totals (kpt_num * num_per_kpt)
+    # need to change if we want to truncate orbital space within pyscf
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.set_ao_basis_ao_num_per_kpt(ao_num//kpt_num)
+    ezfio.set_mo_basis_mo_num_per_kpt(mo_num//kpt_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    if isinstance(nucl_label[0],bytes):
+        nucl_label = list(map(lambda x:x.decode(),nucl_label))
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+    
+    
+    #Just need one (can clean this up later)
+    ao_prim_num_max  = 5
+    
+    d = [ [0] *ao_prim_num_max]*ao_num
+    ezfio.set_ao_basis_ao_prim_num([ao_prim_num_max]*ao_num)
+    ezfio.set_ao_basis_ao_power(d)
+    ezfio.set_ao_basis_ao_coef(d)
+    ezfio.set_ao_basis_ao_expo(d)
+    
+    
+    
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef_kpts = qph5['mo_basis/mo_coef_kpts'][()].tolist()
+        mo_coef_cplx = qph5['mo_basis/mo_coef_complex'][()].tolist()
+    ezfio.set_mo_basis_mo_coef_kpts(mo_coef_kpts)
+    ezfio.set_mo_basis_mo_coef_complex(mo_coef_cplx)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    
+    ##########################################
+    #                                        #
+    #            Integrals Mono              #
+    #                                        #
+    ##########################################
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_one_e_ints' in qph5.keys():
+            kin_ao_reim=qph5['ao_one_e_ints/ao_integrals_kinetic_kpts'][()].tolist()
+            ovlp_ao_reim=qph5['ao_one_e_ints/ao_integrals_overlap_kpts'][()].tolist()
+            ne_ao_reim=qph5['ao_one_e_ints/ao_integrals_n_e_kpts'][()].tolist()
+    
+            ezfio.set_ao_one_e_ints_ao_integrals_kinetic_kpts(kin_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_overlap_kpts(ovlp_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_n_e_kpts(ne_ao_reim)
+            
+            ezfio.set_ao_one_e_ints_io_ao_integrals_kinetic('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_overlap('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_n_e('Read')
+      
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'mo_one_e_ints' in qph5.keys():
+            kin_mo_reim=qph5['mo_one_e_ints/mo_integrals_kinetic_kpts'][()].tolist()
+            ovlp_mo_reim=qph5['mo_one_e_ints/mo_integrals_overlap_kpts'][()].tolist()
+            ne_mo_reim=qph5['mo_one_e_ints/mo_integrals_n_e_kpts'][()].tolist()
+    
+            ezfio.set_mo_one_e_ints_mo_integrals_kinetic_kpts(kin_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_overlap_kpts(ovlp_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_n_e_kpts(ne_mo_reim)
+            
+            ezfio.set_mo_one_e_ints_io_mo_integrals_kinetic('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_overlap('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+      
+    ##########################################
+    #                                        #
+    #               k-points                 #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kconserv = qph5['nuclei/kconserv'][()].tolist()
+    
+    ezfio.set_nuclei_kconserv(kconserv)
+    ezfio.set_nuclei_io_kconserv('Read')
+      
+    ##########################################
+    #                                        #
+    #             Integrals Bi               #
+    #                                        #
+    ##########################################
+    
+    # should this be in ao_basis? ao_two_e_ints?
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+            if 'df_ao_integrals' in qph5['ao_two_e_ints'].keys():
+    #            dfao_re0=qph5['ao_two_e_ints/df_ao_integrals_real'][()].transpose((3,2,1,0))
+    #            dfao_im0=qph5['ao_two_e_ints/df_ao_integrals_imag'][()].transpose((3,2,1,0))
+    #            dfao_cmplx0 = np.stack((dfao_re0,dfao_im0),axis=-1).tolist()
+    #            ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_cmplx0)
+                dfao_reim=qph5['ao_two_e_ints/df_ao_integrals'][()].tolist()
+                ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_reim)
+                ezfio.set_ao_two_e_ints_io_df_ao_integrals('Read')
+    
+        if 'mo_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+    #        dfmo_re0=qph5['mo_two_e_ints/df_mo_integrals_real'][()].transpose((3,2,1,0))
+    #        dfmo_im0=qph5['mo_two_e_ints/df_mo_integrals_imag'][()].transpose((3,2,1,0))
+    #        dfmo_cmplx0 = np.stack((dfmo_re0,dfmo_im0),axis=-1).tolist()
+    #        ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_cmplx0)
+            dfmo_reim=qph5['mo_two_e_ints/df_mo_integrals'][()].tolist()
+            ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_reim)
+            ezfio.set_mo_two_e_ints_io_df_mo_integrals('Read')
+   
+    return
+
+def convert_cplx(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(True)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kpt_num = qph5['nuclei'].attrs['kpt_num']
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_kpt_num(kpt_num)
+    kpt_pair_num = (kpt_num*kpt_num + kpt_num)//2
+    ezfio.set_nuclei_kpt_pair_num(kpt_pair_num)
+    
+    # don't multiply nuclei by kpt_num
+    # work in k-space, not in equivalent supercell
+    nucl_num_per_kpt = nucl_num 
+    ezfio.set_nuclei_nucl_num(nucl_num_per_kpt)
+    
+    # these are totals (kpt_num * num_per_kpt)
+    # need to change if we want to truncate orbital space within pyscf
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    if isinstance(nucl_label[0],bytes):
+        nucl_label = list(map(lambda x:x.decode(),nucl_label))
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+    
+    
+    #Just need one (can clean this up later)
+    ao_prim_num_max  = 5
+    
+    d = [ [0] *ao_prim_num_max]*ao_num
+    ezfio.set_ao_basis_ao_prim_num([ao_prim_num_max]*ao_num)
+    ezfio.set_ao_basis_ao_power(d)
+    ezfio.set_ao_basis_ao_coef(d)
+    ezfio.set_ao_basis_ao_expo(d)
+    
+    
+    
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef_reim = qph5['mo_basis/mo_coef_complex'][()].tolist()
+    ezfio.set_mo_basis_mo_coef_complex(mo_coef_reim)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    
+    ##########################################
+    #                                        #
+    #            Integrals Mono              #
+    #                                        #
+    ##########################################
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_one_e_ints' in qph5.keys():
+            kin_ao_reim=qph5['ao_one_e_ints/ao_integrals_kinetic'][()].tolist()
+            ovlp_ao_reim=qph5['ao_one_e_ints/ao_integrals_overlap'][()].tolist()
+            ne_ao_reim=qph5['ao_one_e_ints/ao_integrals_n_e'][()].tolist()
+    
+            ezfio.set_ao_one_e_ints_ao_integrals_kinetic_complex(kin_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_overlap_complex(ovlp_ao_reim)
+            ezfio.set_ao_one_e_ints_ao_integrals_n_e_complex(ne_ao_reim)
+            
+            ezfio.set_ao_one_e_ints_io_ao_integrals_kinetic('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_overlap('Read')
+            ezfio.set_ao_one_e_ints_io_ao_integrals_n_e('Read')
+      
+        
+    with h5py.File(qph5path,'r') as qph5:
+        if 'mo_one_e_ints' in qph5.keys():
+            kin_mo_reim=qph5['mo_one_e_ints/mo_integrals_kinetic'][()].tolist()
+            #ovlp_mo_reim=qph5['mo_one_e_ints/mo_integrals_overlap'][()].tolist()
+            ne_mo_reim=qph5['mo_one_e_ints/mo_integrals_n_e'][()].tolist()
+    
+            ezfio.set_mo_one_e_ints_mo_integrals_kinetic_complex(kin_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_overlap_complex(ovlp_mo_reim)
+            #ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            ezfio.set_mo_one_e_ints_mo_integrals_n_e_complex(ne_mo_reim)
+            
+            ezfio.set_mo_one_e_ints_io_mo_integrals_kinetic('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_overlap('Read')
+            #ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+            ezfio.set_mo_one_e_ints_io_mo_integrals_n_e('Read')
+      
+    ##########################################
+    #                                        #
+    #               k-points                 #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        kconserv = qph5['nuclei/kconserv'][()].tolist()
+    
+    ezfio.set_nuclei_kconserv(kconserv)
+    ezfio.set_nuclei_io_kconserv('Read')
+      
+    ##########################################
+    #                                        #
+    #             Integrals Bi               #
+    #                                        #
+    ##########################################
+    
+    # should this be in ao_basis? ao_two_e_ints?
+    with h5py.File(qph5path,'r') as qph5:
+        if 'ao_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+            if 'df_ao_integrals' in qph5['ao_two_e_ints'].keys():
+    #            dfao_re0=qph5['ao_two_e_ints/df_ao_integrals_real'][()].transpose((3,2,1,0))
+    #            dfao_im0=qph5['ao_two_e_ints/df_ao_integrals_imag'][()].transpose((3,2,1,0))
+    #            dfao_cmplx0 = np.stack((dfao_re0,dfao_im0),axis=-1).tolist()
+    #            ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_cmplx0)
+                dfao_reim=qph5['ao_two_e_ints/df_ao_integrals'][()].tolist()
+                ezfio.set_ao_two_e_ints_df_ao_integrals_complex(dfao_reim)
+                ezfio.set_ao_two_e_ints_io_df_ao_integrals('Read')
+    
+        if 'mo_two_e_ints' in qph5.keys():
+            df_num = qph5['ao_two_e_ints'].attrs['df_num']
+            ezfio.set_ao_two_e_ints_df_num(df_num)
+    #        dfmo_re0=qph5['mo_two_e_ints/df_mo_integrals_real'][()].transpose((3,2,1,0))
+    #        dfmo_im0=qph5['mo_two_e_ints/df_mo_integrals_imag'][()].transpose((3,2,1,0))
+    #        dfmo_cmplx0 = np.stack((dfmo_re0,dfmo_im0),axis=-1).tolist()
+    #        ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_cmplx0)
+            dfmo_reim=qph5['mo_two_e_ints/df_mo_integrals'][()].tolist()
+            ezfio.set_mo_two_e_ints_df_mo_integrals_complex(dfmo_reim)
+            ezfio.set_mo_two_e_ints_io_df_mo_integrals('Read')
+   
+    return
+    
+#TODO: add check and only do this if ints exist
+#dfmo_re=qph5['mo_two_e_ints/df_mo_integrals_real'][()].transpose((3,2,1,0)).tolist()
+#dfmo_im=qph5['mo_two_e_ints/df_mo_integrals_imag'][()].transpose((3,2,1,0)).tolist()
+#ezfio.set_mo_two_e_ints_df_mo_integrals_real(dfmo_re)
+#ezfio.set_mo_two_e_ints_df_mo_integrals_imag(dfmo_im)
+
+convert_kpts(fname,qph5name)
diff --git a/src/utils_complex/create_ezfio_pyscf_mol.py b/src/utils_complex/create_ezfio_pyscf_mol.py
new file mode 100755
index 00000000..cf9c4655
--- /dev/null
+++ b/src/utils_complex/create_ezfio_pyscf_mol.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+from ezfio import ezfio
+import h5py
+
+import sys
+import numpy as np
+fname = sys.argv[1]
+qph5name = sys.argv[2]
+
+#qph5=h5py.File(qph5path,'r')
+
+def convert_mol(filename,qph5path):
+    ezfio.set_file(filename)
+    ezfio.set_nuclei_is_complex(False)
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_num = qph5['nuclei'].attrs['nucl_num']
+        ao_num = qph5['ao_basis'].attrs['ao_num']
+        mo_num = qph5['mo_basis'].attrs['mo_num']
+        elec_alpha_num = qph5['electrons'].attrs['elec_alpha_num']
+        elec_beta_num = qph5['electrons'].attrs['elec_beta_num']
+    
+    ezfio.set_nuclei_nucl_num(nucl_num)
+    
+    ezfio.set_ao_basis_ao_num(ao_num)
+    ezfio.set_mo_basis_mo_num(mo_num)
+    ezfio.electrons_elec_alpha_num = elec_alpha_num
+    ezfio.electrons_elec_beta_num  = elec_beta_num
+    
+    
+    
+    ##ao_num = mo_num
+    ##Important !
+    #import math
+    #nelec_per_kpt = num_elec // n_kpts
+    #nelec_alpha_per_kpt = int(math.ceil(nelec_per_kpt / 2.))
+    #nelec_beta_per_kpt = int(math.floor(nelec_per_kpt / 2.))
+    #
+    #ezfio.electrons_elec_alpha_num = int(nelec_alpha_per_kpt * n_kpts)
+    #ezfio.electrons_elec_beta_num = int(nelec_beta_per_kpt * n_kpts)
+    
+    #ezfio.electrons_elec_alpha_num = int(math.ceil(num_elec / 2.))
+    #ezfio.electrons_elec_beta_num = int(math.floor(num_elec / 2.))
+    
+    #ezfio.set_utils_num_kpts(n_kpts)
+    #ezfio.set_integrals_bielec_df_num(n_aux)
+    
+    #(old)Important
+    #ezfio.set_nuclei_nucl_num(nucl_num)
+    #ezfio.set_nuclei_nucl_charge([0.]*nucl_num)
+    #ezfio.set_nuclei_nucl_coord( [ [0.], [0.], [0.] ]*nucl_num )
+    #ezfio.set_nuclei_nucl_label( ['He'] * nucl_num )
+    
+    
+    with h5py.File(qph5path,'r') as qph5:
+        nucl_charge=qph5['nuclei/nucl_charge'][()].tolist()
+        nucl_coord=qph5['nuclei/nucl_coord'][()].T.tolist()
+        nucl_label=qph5['nuclei/nucl_label'][()].tolist()
+        nuclear_repulsion = qph5['nuclei'].attrs['nuclear_repulsion']
+    
+    ezfio.set_nuclei_nucl_charge(nucl_charge)
+    ezfio.set_nuclei_nucl_coord(nucl_coord)
+    ezfio.set_nuclei_nucl_label(nucl_label)
+    
+    ezfio.set_nuclei_io_nuclear_repulsion('Read')
+    ezfio.set_nuclei_nuclear_repulsion(nuclear_repulsion)
+    
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        do_pseudo = qph5['pseudo'].attrs['do_pseudo']
+        ezfio.set_pseudo_do_pseudo(do_pseudo)
+        if (do_pseudo):
+            ezfio.set_pseudo_pseudo_lmax(qph5['pseudo'].attrs['pseudo_lmax'])
+            ezfio.set_pseudo_pseudo_klocmax(qph5['pseudo'].attrs['pseudo_klocmax'])
+            ezfio.set_pseudo_pseudo_kmax(qph5['pseudo'].attrs['pseudo_kmax'])
+            ezfio.set_pseudo_nucl_charge_remove(qph5['pseudo/nucl_charge_remove'][()].tolist())
+            ezfio.set_pseudo_pseudo_n_k(qph5['pseudo/pseudo_n_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_n_kl(qph5['pseudo/pseudo_n_kl'][()].tolist())
+            ezfio.set_pseudo_pseudo_v_k(qph5['pseudo/pseudo_v_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_v_kl(qph5['pseudo/pseudo_v_kl'][()].tolist())
+            ezfio.set_pseudo_pseudo_dz_k(qph5['pseudo/pseudo_dz_k'][()].tolist())
+            ezfio.set_pseudo_pseudo_dz_kl(qph5['pseudo/pseudo_dz_kl'][()].tolist())
+    
+    ##########################################
+    #                                        #
+    #                Basis                   #
+    #                                        #
+    ##########################################
+    
+    with h5py.File(qph5path,'r') as qph5:
+        coeftmp = qph5['ao_basis/ao_coef'][()]
+        expotmp = qph5['ao_basis/ao_expo'][()]
+        ezfio.set_ao_basis_ao_basis(qph5['ao_basis'].attrs['ao_basis'])
+        ezfio.set_ao_basis_ao_nucl(qph5['ao_basis/ao_nucl'][()].tolist())
+        ezfio.set_ao_basis_ao_prim_num(qph5['ao_basis/ao_prim_num'][()].tolist())
+        ezfio.set_ao_basis_ao_power(qph5['ao_basis/ao_power'][()].tolist())
+        ezfio.set_ao_basis_ao_coef(qph5['ao_basis/ao_coef'][()].tolist())
+        ezfio.set_ao_basis_ao_expo(qph5['ao_basis/ao_expo'][()].tolist())
+    
+    print(coeftmp)
+    print(expotmp)
+      
+    ##########################################
+    #                                        #
+    #               MO Coef                  #
+    #                                        #
+    ##########################################
+        
+    
+    with h5py.File(qph5path,'r') as qph5:
+        mo_coef = qph5['mo_basis/mo_coef'][()].tolist()
+    ezfio.set_mo_basis_mo_coef(mo_coef)
+    #maybe fix qp so we don't need this?
+    #ezfio.set_mo_basis_mo_coef([[i for i in range(mo_num)] * ao_num])
+    
+    return
+
+convert_mol(fname,qph5name)
diff --git a/src/utils_complex/debug_mo_map.irp.f b/src/utils_complex/debug_mo_map.irp.f
new file mode 100644
index 00000000..be593214
--- /dev/null
+++ b/src/utils_complex/debug_mo_map.irp.f
@@ -0,0 +1,136 @@
+program debug_mo_map
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Alpha and Beta Fock matrices in AO basis set
+  END_DOC
+  !TODO: finish implementing this: see complex qp1 (different mapping)
+ 
+  integer                        :: i,j,k,l,k1,r,s
+  integer                        :: i0,j0,k0,l0
+  integer*8                      :: p,q
+  complex*16               :: integral, c0
+ 
+  PROVIDE mo_two_e_integrals_in_map
+ 
+  integer(omp_lock_kind) :: lck(ao_num)
+  integer(map_size_kind)     :: i8
+  integer                        :: ii(4), jj(4), kk(4), ll(4), k2
+  integer(cache_map_size_kind)   :: n_elements_max, n_elements
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  complex*16, parameter    :: i_sign(4) = (/(0.d0,1.d0),(0.d0,1.d0),(0.d0,-1.d0),(0.d0,-1.d0)/)
+  integer(key_kind) :: key1
+ 
+!  call get_cache_map_n_elements_max(mo_integrals_map,n_elements_max)
+!  allocate(keys(n_elements_max), values(n_elements_max))
+ 
+  print*,'  map_size1 = ',mo_integrals_map%map_size
+  print*,'n_elements1 = ',mo_integrals_map%n_elements
+
+  do i8=0_8,mo_integrals_map%map_size
+    print*,'  cache1 idx = ',i8
+    print*,'    map_size = ',mo_integrals_map%map(i8)%map_size
+    print*,'  n_elements = ',mo_integrals_map%map(i8)%n_elements
+  enddo
+
+
+  print*,'  map_size2 = ',mo_integrals_map_2%map_size
+  print*,'n_elements2 = ',mo_integrals_map_2%n_elements
+
+  do i8=0_8,mo_integrals_map_2%map_size
+    print*,'  cache2 idx = ',i8
+    print*,'    map_size = ',mo_integrals_map_2%map(i8)%map_size
+    print*,'  n_elements = ',mo_integrals_map_2%map(i8)%n_elements
+  enddo
+!  do i8=0_8,mo_integrals_map%map_size
+!    n_elements = n_elements_max
+!    call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
+!    do k1=1,n_elements
+!      ! get original key
+!      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+!      key1 = shiftr(keys(k1)+1,1)
+!
+!      call two_e_integrals_index_reverse_complex_1(ii,jj,kk,ll,key1)
+!      ! i<=k, j<=l, ik<=jl
+!      ! ijkl, jilk, klij*, lkji*
+!
+!      if (shiftl(key1,1)==keys(k1)) then !imaginary part (even)
+!        do k2=1,4
+!          if (ii(k2)==0) then
+!            cycle
+!          endif
+!          i = ii(k2)
+!          j = jj(k2)
+!          k = kk(k2)
+!          l = ll(k2)
+!          print'((A),4(I4),1(E15.7),2(I4),2(E9.1))','imag1  ',i,j,k,l,values(k1),k1,k2,i_sign(k2)
+!
+!          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+!          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+!          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+!          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+!
+!        enddo
+!      else ! real part
+!        do k2=1,4
+!          if (ii(k2)==0) then
+!            cycle
+!          endif
+!          i = ii(k2)
+!          j = jj(k2)
+!          k = kk(k2)
+!          l = ll(k2)
+!          print'((A),4(I4),1(E15.7),2(I4))','real1  ',i,j,k,l,values(k1),k1,k2
+!        enddo
+!      endif
+!    enddo
+!  enddo
+!  deallocate(keys,values)
+!
+! 
+!  call get_cache_map_n_elements_max(ao_integrals_map_2,n_elements_max)
+!  allocate(keys(n_elements_max), values(n_elements_max))
+! 
+!  do i8=0_8,ao_integrals_map_2%map_size
+!    n_elements = n_elements_max
+!    call get_cache_map(ao_integrals_map_2,i8,keys,values,n_elements)
+!    do k1=1,n_elements
+!      ! get original key
+!      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+!      key1 = shiftr(keys(k1)+1,1)
+!
+!      call two_e_integrals_index_reverse_complex_2(ii,jj,kk,ll,key1)
+!      ! i>=k, j<=l, ik<=jl
+!      ! ijkl, jilk, klij*, lkji*
+!      if (shiftl(key1,1)==keys(k1)) then !imaginary part
+!        do k2=1,4
+!          if (ii(k2)==0) then
+!            cycle
+!          endif
+!          i = ii(k2)
+!          j = jj(k2)
+!          k = kk(k2)
+!          l = ll(k2)
+!          print'((A),4(I4),1(E15.7),2(I4),2(E9.1))','imag2  ',i,j,k,l,values(k1),k1,k2,i_sign(k2)
+!        enddo
+!      else ! real part
+!        do k2=1,4
+!          if (ii(k2)==0) then
+!            cycle
+!          endif
+!          i = ii(k2)
+!          j = jj(k2)
+!          k = kk(k2)
+!          l = ll(k2)
+!          print'((A),4(I4),1(E15.7),2(I4))','real2  ',i,j,k,l,values(k1),k1,k2
+!        enddo
+!      endif
+!    enddo
+!  enddo
+!  deallocate(keys,values)
+end
diff --git a/src/utils_complex/dump_2e_from_map.irp.f b/src/utils_complex/dump_2e_from_map.irp.f
new file mode 100644
index 00000000..d3a49886
--- /dev/null
+++ b/src/utils_complex/dump_2e_from_map.irp.f
@@ -0,0 +1,118 @@
+program print_2e_integrals_from_map
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Alpha and Beta Fock matrices in AO basis set
+  END_DOC
+  !TODO: finish implementing this: see complex qp1 (different mapping)
+ 
+  integer                        :: i,j,k,l,k1,r,s
+  integer                        :: i0,j0,k0,l0
+  integer*8                      :: p,q
+  complex*16               :: integral, c0
+ 
+  PROVIDE ao_two_e_integrals_in_map
+ 
+  integer(omp_lock_kind) :: lck(ao_num)
+  integer(map_size_kind)     :: i8
+  integer                        :: ii(4), jj(4), kk(4), ll(4), k2
+  integer(cache_map_size_kind)   :: n_elements_max, n_elements
+  integer(key_kind), allocatable :: keys(:)
+  double precision, allocatable  :: values(:)
+  complex*16, parameter    :: i_sign(4) = (/(0.d0,1.d0),(0.d0,1.d0),(0.d0,-1.d0),(0.d0,-1.d0)/)
+  integer(key_kind) :: key1
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+ 
+  do i8=0_8,ao_integrals_map%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_1(ii,jj,kk,ll,key1)
+      ! i<=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part (even)
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          print'((A),4(I4),1(E15.7),2(I4),2(E9.1))','imag1  ',i,j,k,l,values(k1),k1,k2,i_sign(k2)
+
+          !G_a(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_b(i,k) += D_{ab}(l,j)*(<ij|kl>)
+          !G_a(i,l) -= D_a   (k,j)*(<ij|kl>)
+          !G_b(i,l) -= D_b   (k,j)*(<ij|kl>)
+
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          print'((A),4(I4),1(E15.7),2(I4))','real1  ',i,j,k,l,values(k1),k1,k2
+        enddo
+      endif
+    enddo
+  enddo
+  deallocate(keys,values)
+
+ 
+  call get_cache_map_n_elements_max(ao_integrals_map_2,n_elements_max)
+  allocate(keys(n_elements_max), values(n_elements_max))
+ 
+  do i8=0_8,ao_integrals_map_2%map_size
+    n_elements = n_elements_max
+    call get_cache_map(ao_integrals_map_2,i8,keys,values,n_elements)
+    do k1=1,n_elements
+      ! get original key
+      ! reverse of 2*key (imag part) and 2*key-1 (real part)
+      key1 = shiftr(keys(k1)+1,1)
+
+      call two_e_integrals_index_reverse_complex_2(ii,jj,kk,ll,key1)
+      ! i>=k, j<=l, ik<=jl
+      ! ijkl, jilk, klij*, lkji*
+      if (shiftl(key1,1)==keys(k1)) then !imaginary part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          print'((A),4(I4),1(E15.7),2(I4),2(E9.1))','imag2  ',i,j,k,l,values(k1),k1,k2,i_sign(k2)
+        enddo
+      else ! real part
+        do k2=1,4
+          if (ii(k2)==0) then
+            cycle
+          endif
+          i = ii(k2)
+          j = jj(k2)
+          k = kk(k2)
+          l = ll(k2)
+          print'((A),4(I4),1(E15.7),2(I4))','real2  ',i,j,k,l,values(k1),k1,k2
+        enddo
+      endif
+    enddo
+  enddo
+  deallocate(keys,values)
+end
diff --git a/src/utils_complex/dump_ao_1e_cplx.irp.f b/src/utils_complex/dump_ao_1e_cplx.irp.f
new file mode 100644
index 00000000..de5a48ee
--- /dev/null
+++ b/src/utils_complex/dump_ao_1e_cplx.irp.f
@@ -0,0 +1,51 @@
+program print_ao_1e_integrals
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer                        :: i,j
+  
+  write(*,'(A)') 'ao_one_e_integrals_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_one_e_integrals_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_kinetic_integrals_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_kinetic_integrals_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_ne_integrals_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_integrals_n_e_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_overlap_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_overlap_complex(i,:)
+  enddo
+  write(*,'(A)') 's_inv_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') s_inv_complex(i,:)
+  enddo
+  write(*,'(A)') 's_half_inv_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') s_half_inv_complex(i,:)
+  enddo
+  write(*,'(A)') 's_half_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') s_half_complex(i,:)
+  enddo
+  write(*,'(A)') 'ao_ortho_canonical_coef_complex'
+  write(*,'(A)') '---------------'
+  do i=1,ao_num
+    write(*,'(200(E24.15))') ao_ortho_canonical_coef_complex(i,:)
+  enddo
+end
diff --git a/src/utils_complex/dump_ao_2e_cplx.irp.f b/src/utils_complex/dump_ao_2e_cplx.irp.f
new file mode 100644
index 00000000..2db5f614
--- /dev/null
+++ b/src/utils_complex/dump_ao_2e_cplx.irp.f
@@ -0,0 +1,33 @@
+program print_ao_2e_integrals
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer ::i,j,k,l
+
+  provide ao_two_e_integrals_in_map
+  complex*16 :: get_ao_two_e_integral_complex, tmp_cmplx
+  do i=1,ao_num 
+    do j=1,ao_num 
+      do k=1,ao_num 
+        do l=1,ao_num 
+          tmp_cmplx = get_ao_two_e_integral_complex(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+          print'(4(I4),2(E23.15))',i,j,k,l,tmp_cmplx
+        enddo
+      enddo
+    enddo
+  enddo
+  print*,'map1'
+  do i=0,ao_integrals_map%map_size
+    print*,i,ao_integrals_map%map(i)%value(:)
+    print*,i,ao_integrals_map%map(i)%key(:)
+  enddo
+  print*,'map2'
+  do i=0,ao_integrals_map_2%map_size
+    print*,i,ao_integrals_map_2%map(i)%value(:)
+    print*,i,ao_integrals_map_2%map(i)%key(:)
+  enddo
+end
diff --git a/src/utils_complex/dump_ao_2e_from_df.irp.f b/src/utils_complex/dump_ao_2e_from_df.irp.f
new file mode 100644
index 00000000..1ca00e09
--- /dev/null
+++ b/src/utils_complex/dump_ao_2e_from_df.irp.f
@@ -0,0 +1,117 @@
+program dump_ao_2e_from_df
+  call run_ao_dump
+end
+
+subroutine run_ao_dump
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill ao bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_ao,j_ao,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:), ints_ikjl(:,:,:,:)
+
+  complex*16 :: integral,intmap, get_ao_two_e_integral_complex
+  double precision               :: tmp_re,tmp_im
+  integer                        :: ao_num_kpt_2
+
+  logical :: use_map1
+  integer(keY_kind) :: idx_tmp
+  double precision :: sign
+
+  ao_num_kpt_2 = ao_num_per_kpt * ao_num_per_kpt
+
+
+  allocate( ints_jl(ao_num_per_kpt,ao_num_per_kpt,df_num))
+
+  do kl=1, kpt_num
+    do kj=1, kl
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_ao=1,ao_num_per_kpt
+          do j_ao=1,ao_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        ints_jl = df_ao_integrals_complex(:,:,:,kjkl2)
+      endif
+  
+  allocate( &
+    ints_ik(ao_num_per_kpt,ao_num_per_kpt,df_num), &
+    ints_ikjl(ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt) &
+  )
+
+      do kk=1,kl
+        ki=kconserv(kl,kk,kj)
+        if (ki > kl) cycle
+        !if ((kl == kj) .and. (ki > kk)) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+        !if (kikk2 > kjkl2) cycle
+        if (ki < kk) then
+          do i_ao=1,ao_num_per_kpt
+            do j_ao=1,ao_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          ints_ik = df_ao_integrals_complex(:,:,:,kikk2)
+        endif
+
+        call zgemm('N','T', ao_num_kpt_2, ao_num_kpt_2, df_num, &
+               (1.d0,0.d0), ints_ik, ao_num_kpt_2, &
+               ints_jl, ao_num_kpt_2, &
+               (0.d0,0.d0), ints_ikjl, ao_num_kpt_2)
+        print'((A),4(I4))','IJKL',ki,kj,kk,kl
+        do il=1,ao_num_per_kpt
+          l=il+(kl-1)*ao_num_per_kpt
+          do ij=1,ao_num_per_kpt
+            j=ij+(kj-1)*ao_num_per_kpt
+            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do ik=1,ao_num_per_kpt
+              k=ik+(kk-1)*ao_num_per_kpt
+              if (k>l) exit
+              do ii=1,ao_num_per_kpt
+                i=ii+(ki-1)*ao_num_per_kpt
+                if ((j==l) .and. (i>k)) exit
+                call idx2_tri_int(i,k,ik2)
+                if (ik2 > jl2) exit
+                integral = ints_ikjl(ii,ik,ij,il)
+                intmap = get_ao_two_e_integral_complex(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+!                print*,i,k,j,l,real(integral),imag(integral)
+                if ((cdabs(integral) + cdabs(intmap)) < ao_integrals_threshold) then
+                  cycle
+                endif
+                if (cdabs(integral-intmap) < 1.d-8) then
+                  print'(4(I4),4(E15.7))',i,j,k,l,integral,intmap
+                else
+                  print'(4(I4),4(E15.7),(A))',i,j,k,l,integral,intmap,'***'
+                endif
+              enddo !ii
+            enddo !ik
+          enddo !ij
+        enddo !il
+      enddo !kk
+  deallocate( &
+    ints_ik, &
+    ints_ikjl &
+    )
+    enddo !kj
+  enddo !kl
+  deallocate( ints_jl ) 
+
+  
+end
+
diff --git a/src/utils_complex/dump_ao_2e_from_df_all.irp.f b/src/utils_complex/dump_ao_2e_from_df_all.irp.f
new file mode 100644
index 00000000..5ca67f11
--- /dev/null
+++ b/src/utils_complex/dump_ao_2e_from_df_all.irp.f
@@ -0,0 +1,117 @@
+program dump_ao_2e_from_df
+  call run_ao_dump
+end
+
+subroutine run_ao_dump
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! fill ao bielec integral map using 3-index df integrals
+  END_DOC
+
+  integer :: i,k,j,l
+  integer :: ki,kk,kj,kl
+  integer :: ii,ik,ij,il
+  integer :: kikk2,kjkl2,jl2,ik2
+  integer :: i_ao,j_ao,i_df
+
+  complex*16,allocatable :: ints_ik(:,:,:), ints_jl(:,:,:), ints_ikjl(:,:,:,:)
+
+  complex*16 :: integral,intmap, get_ao_two_e_integral_complex
+  double precision               :: tmp_re,tmp_im
+  integer                        :: ao_num_kpt_2
+
+  logical :: use_map1
+  integer(keY_kind) :: idx_tmp
+  double precision :: sign
+
+  ao_num_kpt_2 = ao_num_per_kpt * ao_num_per_kpt
+
+
+  allocate( ints_jl(ao_num_per_kpt,ao_num_per_kpt,df_num))
+
+  do kl=1, kpt_num
+    do kj=1, kpt_num
+      call idx2_tri_int(kj,kl,kjkl2)
+      if (kj < kl) then
+        do i_ao=1,ao_num_per_kpt
+          do j_ao=1,ao_num_per_kpt
+            do i_df=1,df_num
+              ints_jl(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kjkl2))
+            enddo
+          enddo
+        enddo
+      else
+        ints_jl = df_ao_integrals_complex(:,:,:,kjkl2)
+      endif
+  
+  allocate( &
+    ints_ik(ao_num_per_kpt,ao_num_per_kpt,df_num), &
+    ints_ikjl(ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt,ao_num_per_kpt) &
+  )
+
+      do kk=1,kpt_num
+        ki=kconserv(kl,kk,kj)
+!        if ((kl == kj) .and. (ki > kk)) cycle
+        call idx2_tri_int(ki,kk,kikk2)
+!        if (kikk2 > kjkl2) cycle
+        if (ki < kk) then
+          do i_ao=1,ao_num_per_kpt
+            do j_ao=1,ao_num_per_kpt
+              do i_df=1,df_num
+                ints_ik(i_ao,j_ao,i_df) = dconjg(df_ao_integrals_complex(j_ao,i_ao,i_df,kikk2))
+              enddo
+            enddo
+          enddo
+!          ints_ik = conjg(reshape(df_mo_integral_array(:,:,:,kikk2),(/mo_num_per_kpt,mo_num_per_kpt,df_num/),order=(/2,1,3/)))
+        else
+          ints_ik = df_ao_integrals_complex(:,:,:,kikk2)
+        endif
+
+        call zgemm('N','T', ao_num_kpt_2, ao_num_kpt_2, df_num, &
+               (1.d0,0.d0), ints_ik, ao_num_kpt_2, &
+               ints_jl, ao_num_kpt_2, &
+               (0.d0,0.d0), ints_ikjl, ao_num_kpt_2)
+        print'((A),4(I4))','IJKL',ki,kj,kk,kl
+        do il=1,ao_num_per_kpt
+          l=il+(kl-1)*ao_num_per_kpt
+          do ij=1,ao_num_per_kpt
+            j=ij+(kj-1)*ao_num_per_kpt
+!            if (j>l) exit
+            call idx2_tri_int(j,l,jl2)
+            do ik=1,ao_num_per_kpt
+              k=ik+(kk-1)*ao_num_per_kpt
+!              if (k>l) exit
+              do ii=1,ao_num_per_kpt
+                i=ii+(ki-1)*ao_num_per_kpt
+!                if ((j==l) .and. (i>k)) exit
+!                call idx2_tri_int(i,k,ik2)
+!                if (ik2 > jl2) exit
+                integral = ints_ikjl(ii,ik,ij,il)
+                intmap = get_ao_two_e_integral_complex(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+!                print*,i,k,j,l,real(integral),imag(integral)
+                if ((cdabs(integral) + cdabs(intmap)) < ao_integrals_threshold) then
+                  cycle
+                endif
+                if (cdabs(integral-intmap) < 1.d-14) then
+                  cycle
+                  !print'(4(I4),4(E15.7))',i,j,k,l,integral,intmap
+                else
+                  print'(4(I4),4(E15.7),(A))',i,j,k,l,integral,intmap,'***'
+                endif
+              enddo !ii
+            enddo !ik
+          enddo !ij
+        enddo !il
+      enddo !kk
+  deallocate( &
+    ints_ik, &
+    ints_ikjl &
+    )
+    enddo !kj
+  enddo !kl
+  deallocate( ints_jl ) 
+
+  
+end
+
diff --git a/src/utils_complex/dump_df_ao.irp.f b/src/utils_complex/dump_df_ao.irp.f
new file mode 100644
index 00000000..5659bd58
--- /dev/null
+++ b/src/utils_complex/dump_df_ao.irp.f
@@ -0,0 +1,26 @@
+program dump_df_ao
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+
+  integer ::i,j,k,mu
+  complex*16 :: integral
+
+  provide df_ao_integrals_complex
+  do k=1,kpt_pair_num
+    do mu=1,df_num
+      do i=1,ao_num_per_kpt
+        do j=1,ao_num_per_kpt
+          integral = df_ao_integrals_complex(i,j,mu,k)
+          if (cdabs(integral).gt.1.d-12) then
+            print'(4(I4),4(E15.7))',i,j,mu,k,integral,dble(integral),dimag(integral)
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
diff --git a/src/utils_complex/dump_df_mo.irp.f b/src/utils_complex/dump_df_mo.irp.f
new file mode 100644
index 00000000..cd97d6bb
--- /dev/null
+++ b/src/utils_complex/dump_df_mo.irp.f
@@ -0,0 +1,26 @@
+program dump_df_mo
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+
+  integer ::i,j,k,mu
+  complex*16 :: integral
+
+  provide df_mo_integrals_complex
+  do k=1,kpt_pair_num
+    do mu=1,df_num
+      do i=1,mo_num_per_kpt
+        do j=1,mo_num_per_kpt
+          integral = df_mo_integrals_complex(i,j,mu,k)
+          if (cdabs(integral).gt.1.d-12) then
+            print'(4(I4),4(E15.7))',i,j,mu,k,integral,dble(integral),dimag(integral)
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+end
diff --git a/src/utils_complex/dump_kcon.irp.f b/src/utils_complex/dump_kcon.irp.f
new file mode 100644
index 00000000..9f74a0c0
--- /dev/null
+++ b/src/utils_complex/dump_kcon.irp.f
@@ -0,0 +1,21 @@
+program dump_kcon
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+
+  integer ::i,j,k,l
+
+  provide kconserv
+  do i=1,kpt_num
+    do j=1,kpt_num
+      do k=1,kpt_num
+        l = kconserv(i,j,k)
+        print'(4(I4))',i,j,k,l
+      enddo
+    enddo
+  enddo
+
+end
diff --git a/src/utils_complex/dump_mo_2e_cplx.irp.f b/src/utils_complex/dump_mo_2e_cplx.irp.f
new file mode 100644
index 00000000..44bb706d
--- /dev/null
+++ b/src/utils_complex/dump_mo_2e_cplx.irp.f
@@ -0,0 +1,35 @@
+program print_mo_2e_integrals
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer ::i,j,k,l
+
+  provide mo_two_e_integrals_in_map
+  complex*16 :: get_two_e_integral_complex, tmp_cmplx
+  do i=1,mo_num 
+    do j=1,mo_num 
+      do k=1,mo_num 
+        do l=1,mo_num 
+          tmp_cmplx = get_two_e_integral_complex(i,j,k,l,mo_integrals_map,mo_integrals_map_2)
+          if (cdabs(tmp_cmplx).gt. 1d-12) then
+            print'(4(I4),2(E23.15))',i,j,k,l,tmp_cmplx
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+!  print*,'map1'
+!  do i=0,mo_integrals_map%map_size
+!    print*,i,mo_integrals_map%map(i)%value(:)
+!    print*,i,mo_integrals_map%map(i)%key(:)
+!  enddo
+!  print*,'map2'
+!  do i=0,mo_integrals_map_2%map_size
+!    print*,i,mo_integrals_map_2%map(i)%value(:)
+!    print*,i,mo_integrals_map_2%map(i)%key(:)
+!  enddo
+end
diff --git a/src/utils_complex/dump_mo_coef.irp.f b/src/utils_complex/dump_mo_coef.irp.f
new file mode 100644
index 00000000..c024e07c
--- /dev/null
+++ b/src/utils_complex/dump_mo_coef.irp.f
@@ -0,0 +1,22 @@
+program print_mo_coef
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer ::i,j,k,l
+
+  provide mo_coef_complex
+  complex*16 :: tmp_cmplx
+!  complex*16 :: get_two_e_integral_complex, tmp_cmplx
+  do i=1,ao_num
+    do j=1,mo_num
+      tmp_cmplx = mo_coef_complex(i,j)
+      if (cdabs(tmp_cmplx).gt.1.d-10) then
+        print'(2(I4),2(E23.15))',i,j,tmp_cmplx
+      endif
+    enddo
+  enddo
+end
diff --git a/src/utils_complex/export_integrals_ao_cplx.irp.f b/src/utils_complex/export_integrals_ao_cplx.irp.f
new file mode 100644
index 00000000..7d40ba76
--- /dev/null
+++ b/src/utils_complex/export_integrals_ao_cplx.irp.f
@@ -0,0 +1,226 @@
+program print_integrals
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer :: iunit
+  integer :: getunitandopen
+
+  integer ::i,j,k,l
+  double precision :: integral
+  double precision, allocatable :: A(:,:), B(:,:)
+  double precision :: tmp_re, tmp_im
+
+  integer             :: n_integrals_1, n_integrals_2 
+  integer(key_kind), allocatable   :: buffer_i_1(:), buffer_i_2(:) 
+  real(integral_kind), allocatable :: buffer_values_1(:), buffer_values_2(:)
+  logical :: use_map1
+  integer(key_kind) :: idx_tmp
+  double precision :: sign
+
+
+
+provide ao_two_e_integrals_in_map
+  allocate (A(ao_num,ao_num), B(ao_num,ao_num) )
+  
+  A(1,1) = huge(1.d0)
+  iunit = getunitandopen('E.qp','r')
+  read (iunit,*,end=9) A(1,1)
+  9 continue
+  close(iunit)
+  if (A(1,1) /= huge(1.d0)) then
+!    call ezfio_set_nuclei_nuclear_repulsion(A(1,1))
+!    call ezfio_set_nuclei_io_nuclear_repulsion("Read")
+     print*, nuclear_repulsion,A(1,1)
+  endif
+
+  A = 0.d0
+  B = 0.d0
+ ! iunit = getunitandopen('T.qp','r')
+ ! do 
+ !   read (iunit,*,end=10) i,j, tmp_re, tmp_im
+ !   A(i,j) = tmp_re
+ !   B(i,j) = tmp_im
+ !   print*,ao_kinetic_integrals(i,j),A(i,j)
+ !   print*,ao_kinetic_integrals_imag(i,j),B(i,j)
+ !   if (i.ne.j) then
+ !     A(j,i) =  tmp_re
+ !     B(j,i) = -tmp_im
+ !   print*,ao_kinetic_integrals(j,i),A(j,i)
+ !   print*,ao_kinetic_integrals_imag(j,i),B(j,i)
+ !   endif
+ ! enddo
+ ! 10 continue
+ ! close(iunit)
+!  call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_imag(B(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic("Read")
+
+  A = 0.d0
+  B = 0.d0
+ ! iunit = getunitandopen('S.qp','r')
+ ! do 
+ !   read (iunit,*,end=11) i,j, tmp_re, tmp_im
+ !   A(i,j) = tmp_re
+ !   B(i,j) = tmp_im
+ !   print*,real(ao_overlap_complex(i,j)),A(i,j)
+ !   print*,imag(ao_overlap_complex(i,j)),B(i,j)
+ !   print*,ao_overlap_imag(i,j),B(i,j)
+ !   if (i.ne.j) then
+ !     A(j,i) =  tmp_re
+ !     B(j,i) = -tmp_im
+ !   print*,real(ao_overlap_complex(j,i)),A(j,i)
+ !   print*,imag(ao_overlap_complex(j,i)),B(j,i)
+ !   print*,ao_overlap_imag(j,i),B(j,i)
+ !   endif
+ ! enddo
+ ! 11 continue
+ ! close(iunit)
+!  call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_ao_integrals_overlap_imag(B(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap("Read")
+
+  A = 0.d0
+  B = 0.d0
+!  iunit = getunitandopen('P.qp','r')
+!  do 
+!    read (iunit,*,end=14) i,j, tmp_re, tmp_im
+!    A(i,j) = tmp_re
+!    B(i,j) = tmp_im
+!    print*,ao_pseudo_integrals(i,j),A(i,j)
+!    print*,ao_pseudo_integrals_imag(i,j),B(i,j)
+!   ! print*,real(ao_integrals_pseudo(i,j)),A(i,j)
+!   ! print*,imag(ao_integrals_pseudo(i,j)),B(i,j)
+!    if (i.ne.j) then
+!      A(j,i) =  tmp_re
+!      B(j,i) = -tmp_im
+!    print*,ao_pseudo_integrals(j,i),A(j,i)
+!    print*,ao_pseudo_integrals_imag(j,i),B(j,i)
+!   ! print*,real(ao_integrals_pseudo(j,i)),A(j,i)
+!   ! print*,imag(ao_integrals_pseudo(j,i)),B(j,i)
+!    endif
+!  enddo
+!  14 continue
+!  close(iunit)
+!  call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A(1:ao_num,1:ao_num))
+!  call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_imag(B(1:ao_num,1:ao_num))
+!  call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo("Read")
+
+  A = 0.d0
+  B = 0.d0
+!  iunit = getunitandopen('V.qp','r')
+!  do 
+!    read (iunit,*,end=12) i,j, tmp_re, tmp_im
+!    A(i,j) = tmp_re
+!    B(i,j) = tmp_im
+!    print*,ao_integrals_n_e(i,j),A(i,j)
+!    print*,ao_integrals_n_e_imag(i,j),B(i,j)
+!    if (i.ne.j) then
+!      A(j,i) =  tmp_re
+!      B(j,i) = -tmp_im
+!      print*,ao_integrals_n_e(j,i),A(j,i)
+!      print*,ao_integrals_n_e_imag(j,i),B(j,i)
+!    endif
+!  enddo
+!  12 continue
+!  close(iunit)
+!  call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_ao_integrals_n_e_imag(B(1:ao_num, 1:ao_num))
+!  call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e("Read")
+  complex*16 :: int2e_tmp1,int2e_tmp2,get_ao_two_e_integral_complex_simple,get_ao_two_e_integral_complex, tmp_cmplx
+  double precision :: tmp3,tmp4,tmp5,tmp6
+  double precision :: thr0
+  thr0 = 1.d-10
+  allocate(buffer_i_1(ao_num**3), buffer_values_1(ao_num**3))
+  allocate(buffer_i_2(ao_num**3), buffer_values_2(ao_num**3))
+  iunit = getunitandopen('W.qp','r')
+  n_integrals_1=0
+  n_integrals_2=0
+  buffer_values_1 = 0.d0
+  buffer_values_2 = 0.d0
+  do 
+    read (iunit,*,end=13) i,j,k,l, tmp_re, tmp_im
+    tmp_cmplx = dcmplx(tmp_re,tmp_im)
+    int2e_tmp1 = get_ao_two_e_integral_complex_simple(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+    int2e_tmp2 = get_ao_two_e_integral_complex(i,j,k,l,ao_integrals_map,ao_integrals_map_2)
+  !  print'(4(I4),3(E15.7))',i,j,k,l,tmp_re,real(int2e_tmp1),real(int2e_tmp2)
+ !   print'(4(I4),3(E15.7))',i,j,k,l,tmp_im,imag(int2e_tmp1),imag(int2e_tmp2)
+    call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+!    print*,use_map1,idx_tmp,sign
+    call map_get(ao_integrals_map,idx_tmp,tmp3)
+    call map_get(ao_integrals_map_2,idx_tmp,tmp4)
+    call map_get(ao_integrals_map,idx_tmp+1,tmp5)
+    call map_get(ao_integrals_map_2,idx_tmp+1,tmp6)
+   ! print*,tmp3,tmp4
+   ! print*,tmp5,tmp6
+   if (cdabs(tmp_cmplx-int2e_tmp1).gt.thr0) then
+     print'(4(I4),4(E15.7))',i,j,k,l,tmp_cmplx,int2e_tmp1
+   endif
+    integer :: ii
+          ii = l-ao_integrals_cache_min
+          ii = ior( shiftl(ii,6), k-ao_integrals_cache_min)
+          ii = ior( shiftl(ii,6), j-ao_integrals_cache_min)
+          ii = ior( shiftl(ii,6), i-ao_integrals_cache_min)
+!    print*,'cache(pbc)=', ao_integrals_cache_complex(ii)
+!    print*,'cache(old)=', ao_integrals_cache(ii)
+!    print*
+!    if (use_map1) then
+!      n_integrals_1 += 1
+!      buffer_i_1(n_integrals_1-1)=idx_tmp
+!      buffer_values_1(n_integrals_1-1)=tmp_re
+!      if (sign.ne.0.d0) then 
+!        n_integrals_1 += 1
+!        buffer_i_1(n_integrals_2)=idx_tmp+1
+!        buffer_values_1(n_integrals_1)=tmp_im*sign
+!      endif 
+!      if (n_integrals_1 >= size(buffer_i_1)-1) then
+!!        call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+!        n_integrals_1 = 0
+!      endif
+!    else
+!      n_integrals_2 += 1
+!      buffer_i_2(n_integrals_2-1)=idx_tmp
+!      buffer_values_2(n_integrals_2-1)=tmp_re
+!      if (sign.ne.0.d0) then
+!        n_integrals_2 += 1
+!        buffer_i_2(n_integrals_2)=idx_tmp+1
+!        buffer_values_2(n_integrals_2)=tmp_im*sign
+!      endif 
+!      if (n_integrals_2 >= size(buffer_i_2)-1) then
+!!        call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+!        n_integrals_2 = 0
+!      endif
+!    endif
+  enddo
+  13 continue
+  close(iunit)
+  
+!  if (n_integrals_1 > 0) then
+!!    call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+!  endif
+!  if (n_integrals_2 > 0) then
+!!    call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+!  endif
+!
+!  call map_sort(ao_integrals_map)
+!  call map_unique(ao_integrals_map)
+!  call map_sort(ao_integrals_map_2)
+!  call map_unique(ao_integrals_map_2)
+!
+!  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_1',ao_integrals_map)
+!  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_2',ao_integrals_map_2)
+!  call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read'
+print*,'map1'
+  do i=0,ao_integrals_map%map_size
+    print*,i,ao_integrals_map%map(i)%value(:)
+    print*,i,ao_integrals_map%map(i)%key(:)
+  enddo
+print*,'map2'
+  do i=0,ao_integrals_map_2%map_size
+    print*,i,ao_integrals_map_2%map(i)%value(:)
+    print*,i,ao_integrals_map_2%map(i)%key(:)
+  enddo
+end
diff --git a/src/utils_complex/generate_pyscf_twists.py b/src/utils_complex/generate_pyscf_twists.py
new file mode 100644
index 00000000..9b659398
--- /dev/null
+++ b/src/utils_complex/generate_pyscf_twists.py
@@ -0,0 +1,253 @@
+#! /usr/bin/env python
+
+from nexus import settings,job,run_project,obj
+from nexus import generate_physical_system
+from nexus import generate_pyscf
+import spglib
+import numpy as np
+import os
+
+settings(
+    results = '',
+    sleep   = 3,
+    machine = 'ws8',
+    generate_only=1,
+    )
+
+solid_tmp_file = './.tmp_solid_template.py'
+jobparams={}
+jobparams['dfname'] = 'df_ints.h5'
+jobparams['chkname'] = 'diamond.chk'
+jobparams['dftype'] = 'GDF'
+jobparams['auxbasis'] = 'weigend'
+jobparams['xc'] = 'b3lyp'
+
+show_kmap = True
+
+pyscf_job = job(cores=1,serial=True)
+
+cell_types = [
+    'diamond_8_real',
+#    'diamond_8_comp',
+    ]
+
+cell_info = obj(
+    diamond_8_real = obj(
+#        tiling = [[ 1, -1,  1],
+#                  [ 1,  1, -1],
+#                  [-1,  1,  1]],
+        tiling = (1,1,1),
+        kgrid  = (12,12,12),
+        ),
+    )
+
+a = 3.37316115
+axes = np.array([[a,a,0],[0,a,a],[a,0,a]])
+elem = ['C','C']
+pos = [[0,0,0],[a/2,a/2,a/2]]
+
+scf_info = obj(
+        basis = 'bfd-vdz',
+        ecp = 'bfd',
+        drop_exponent = 0.1,
+        verbose = 5,
+        )
+
+tempstr = """
+#!/usr/bin/env python
+
+'''
+Gamma point post-HF calculation needs only real integrals.
+Methods implemented in finite-size system can be directly used here without
+any modification.
+'''
+
+import numpy as np
+from pyscf import lib
+from pyscf.pbc import gto, scf, dft
+from pyscf import gto as Mgto
+from pyscf.pbc import df 
+from pyscf.pbc import ao2mo
+from pyscf.pbc import tools
+from pyscf.pbc.tools.pbc import super_cell
+from functools import reduce
+import scipy.linalg as la
+import os
+
+restart = False
+
+$system
+
+$twistinfo
+
+pwd_top = os.path.dirname(os.path.realpath(__file__))
+for i in range(len(allkpts)):
+    jobdir=pwd_top + '/twist-{:02d}/'.format(i)
+    if not restart:
+        os.mkdir(jobdir)
+    os.chdir(jobdir) 
+    sp_twist=supTwist[i]
+    kpts_i=allkpts[i]  
+    print("i ",i, kpts_i)
+    supcell=cell
+    mydf = df.$dftype(supcell,kpts_i)
+    mydf.auxbasis = '$auxbasis'
+    dfpath = jobdir+'$dfname'
+    if not restart:
+        mydf._cderi_to_save = dfpath   # new
+        mydf.build()                         # new
+    #end if
+    mf = scf.KROKS(supcell,kpts_i).density_fit()
+    mf.xc='$xc'
+   
+    mf.tol           = 1e-10 
+   
+    mf.exxdiv = 'ewald'
+    mf.with_df = mydf
+    chkpath = jobdir + '$chkname'
+    mf.chkfile = chkpath
+    mf.with_df._cderi = dfpath
+    if restart:
+        dm = mf.from_chk(chkpath)    # restart
+        e_scf=mf.kernel(dm)                    # restart
+    else:
+        e_scf=mf.kernel()                      # new
+    #end if
+   
+    with open('e_scf','w') as ener:
+        ener.write('%s\\n' % (e_scf))
+    print('e_scf',e_scf)
+   
+    #title="S8-twist%s"%i
+    #### generated conversion text ###
+    #from PyscfToQmcpack import savetoqmcpack
+    #savetoqmcpack(cell,mf,title=title,kmesh=kmesh,kpts=kpts_i,sp_twist=sp_twist)
+   
+    mycas = list(range(0,30))
+    #title="S8-Cas30-twist%s"%i
+    #### generated conversion text ###
+    #from PyscfToQmcpack import savetoqmcpack
+    #savetoqmcpack(cell,mf,title=title,kmesh=kmesh,kpts=kpts_i,sp_twist=sp_twist, cas_idx=mycas)
+    #### end generated conversion text ###
+   
+    from MolPyscfToQPkpts import pyscf2QP2
+    pyscf2QP2(supcell,mf,kpts=kpts_i,int_threshold = 1E-15,cas_idx=mycas)
+    print('Done for Tw%s'%i)
+    os.chdir(pwd_top) 
+
+"""
+
+#replace $dfname with df_ints.h5
+#replace $chkname with checkpoint file name
+#$dftype is GDF, MDF, etc.
+#$auxbasis weigend?
+#$xc b3lyp
+#$twistinfo
+
+
+for cell_type in cell_types:
+    cell_tiling = cell_info[cell_type].tiling
+    cell_kgrid  = cell_info[cell_type].kgrid
+
+    diamond = generate_physical_system(
+#        axes     = '''
+#                   3.37316115  3.37316115  0.00000000
+#                   0.00000000  3.37316115  3.37316115
+#                   3.37316115  0.00000000  3.37316115''',
+#        elem_pos = '''  
+#                   C  0.00000000   0.00000000   0.00000000
+#                   C  1.686580575  1.686580575  1.686580575''',
+        axes = axes,
+        elem = elem,
+        pos = pos,
+        units    = 'B',
+        tiling   = cell_tiling,
+        kgrid    = cell_kgrid,
+        kshift   = (0,0,0),
+        C        = 4,
+        symm_kgrid=True,
+        )
+
+
+    jobparams['twistinfo'] = ''
+    if show_kmap:
+        print (cell_type)
+        print ('===============================')
+        s = diamond.structure.copy()
+        s.change_units('B') #required for supertwists to be in 1/au instead of 1/Å
+        kmap = s.kmap()
+
+        print ('supercell kpoints/twists')
+        jobparams['twistinfo']+='# supercell kpoints/twists\n'
+        jobparams['twistinfo']+='supTwist=array([\n'
+        for i,k in enumerate(s.kpoints):
+            print ('  ',i,k)
+            jobparams['twistinfo']+=(str(list(k))+',\n')
+        #end for
+        jobparams['twistinfo']+='])\n'
+
+        #print ('primitive cell kpoints')
+        # this should already be written by nexus as part of $system
+        #jobparams['twistinfo']+='# primitive cell kpoints\n')
+        #jobparams['twistinfo']+='orig=array([\n'
+        #for i,k in enumerate(s.folded_structure.kpoints):
+            #print ('  ',i,k)
+            #jobparams['twistinfo']+=(str(list(k))+',\n')
+        #end for
+        #jobparams['twistinfo']+='])\n'
+
+        jobparams['twistinfo']+='# mapping from supercell to primitive cell k-points\n'
+
+        if kmap is None:
+            nkpts = str(len(s.kpoints))
+            jobparams['twistinfo']+='mymap=np.arange('+nkpts+').reshape(('+nkpts+',1))\n'
+            #for i in range(len(s.kpoints)):
+            #    jobparams['twistinfo']+=('['+str(i)+'],\n')
+        else:
+            jobparams['twistinfo']+='mymap=array([\n'
+            for kmapkey in kmap.sorted_keys():
+                jobparams['twistinfo']+=(str(list(kmap[kmapkey]))+',\n')
+            jobparams['twistinfo']+='])\n'
+        print ('mapping from supercell to primitive cell k-points')
+        if kmap is not None:
+            print (kmap)
+
+        #jobparams['twistinfo']+=('allkpts=array(list(map(lambda xs: list(map(lambda x: orig[x], xs)), mymap)))\n')
+        jobparams['twistinfo']+=('allkpts=array(list(map(lambda xs: list(map(lambda x: kpts[x], xs)), mymap)))\n')
+        jobparams['twistinfo']+=('kweights=array('+str(list(s.kweights))+')\n')
+    #end if
+
+    tmp_template = tempstr
+    for key in jobparams.keys():
+        tmp_template = tmp_template.replace('$'+key,jobparams[key])
+    
+    if os.path.isfile(solid_tmp_file):
+        raise Exception(solid_tmp_file,'solid_tmp_file already exists: delete file and try again')
+    with open(solid_tmp_file,'w') as stmp:
+        stmp.write(tmp_template)
+
+    scf = generate_pyscf(
+        identifier = 'scf',
+        path       = cell_type,
+        job        = pyscf_job,
+        template   = solid_tmp_file,
+        system     = diamond,
+        cell = scf_info,
+#        cell       = obj(
+#            basis         = 'bfd-vtz',
+#            ecp           = 'bfd',
+#            drop_exponent = 0.1,
+#            verbose       = 5,
+#            ),
+        save_qmc   = True ,
+        )
+    if os.path.isfile(solid_tmp_file):
+        os.remove(solid_tmp_file)
+
+#end for
+
+#if show_kmap:
+#    exit()
+#end if
+
+run_project()
diff --git a/src/utils_complex/import_ao_2e_cplx.irp.f b/src/utils_complex/import_ao_2e_cplx.irp.f
new file mode 100644
index 00000000..f22e6c50
--- /dev/null
+++ b/src/utils_complex/import_ao_2e_cplx.irp.f
@@ -0,0 +1,89 @@
+program import_ao_2e_complex
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer :: iunit
+  integer :: getunitandopen
+
+  integer ::i,j,k,l
+  double precision :: integral
+  complex*16, allocatable       :: C(:,:)
+  double precision :: tmp_re, tmp_im
+
+  integer             :: n_integrals_1, n_integrals_2 
+  integer(key_kind), allocatable   :: buffer_i_1(:), buffer_i_2(:) 
+  real(integral_kind), allocatable :: buffer_values_1(:), buffer_values_2(:)
+  logical :: use_map1
+  integer(key_kind) :: idx_tmp
+  double precision :: sign
+
+
+!  call ezfio_set_ao_basis_ao_num(ao_num)
+
+  allocate(buffer_i_1(ao_num**3), buffer_values_1(ao_num**3))
+  allocate(buffer_i_2(ao_num**3), buffer_values_2(ao_num**3))
+  iunit = getunitandopen('W.qp','r')
+  n_integrals_1=0
+  n_integrals_2=0
+  buffer_values_1 = 0.d0
+  buffer_values_2 = 0.d0
+  do 
+    read (iunit,*,end=13) i,j,k,l, tmp_re, tmp_im
+    call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+!    print'(4(I4),(L3),(I6),(F7.1))',i,j,k,l,use_map1,idx_tmp,sign
+    if (use_map1) then
+      n_integrals_1 += 1
+      buffer_i_1(n_integrals_1)=idx_tmp
+      buffer_values_1(n_integrals_1)=tmp_re
+!      print'(A,4(I4),(I6),(E15.7))','map1',i,j,k,l,idx_tmp,tmp_re
+      if (sign.ne.0.d0) then 
+        n_integrals_1 += 1
+        buffer_i_1(n_integrals_1)=idx_tmp+1
+        buffer_values_1(n_integrals_1)=tmp_im*sign
+!        print'(A,4(I4),(I6),(E15.7))','map1',i,j,k,l,idx_tmp+1,tmp_im*sign
+      endif 
+      if (n_integrals_1 >= size(buffer_i_1)-1) then
+        call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+        n_integrals_1 = 0
+      endif
+    else
+      n_integrals_2 += 1
+      buffer_i_2(n_integrals_2)=idx_tmp
+      buffer_values_2(n_integrals_2)=tmp_re
+!      print'(A,4(I4),(I6),(E15.7))','map2',i,j,k,l,idx_tmp,tmp_re
+      if (sign.ne.0.d0) then
+        n_integrals_2 += 1
+        buffer_i_2(n_integrals_2)=idx_tmp+1
+        buffer_values_2(n_integrals_2)=tmp_im*sign
+!        print'(A,4(I4),(I6),(E15.7))','map2',i,j,k,l,idx_tmp+1,tmp_im*sign
+      endif 
+      if (n_integrals_2 >= size(buffer_i_2)-1) then
+        call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+        n_integrals_2 = 0
+      endif
+    endif
+  enddo
+  13 continue
+  close(iunit)
+  
+  if (n_integrals_1 > 0) then
+    call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+  endif
+  if (n_integrals_2 > 0) then
+    call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+  endif
+
+  call map_sort(ao_integrals_map)
+  call map_unique(ao_integrals_map)
+  call map_sort(ao_integrals_map_2)
+  call map_unique(ao_integrals_map_2)
+
+  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_1',ao_integrals_map)
+  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_2',ao_integrals_map_2)
+  call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
+  
+end
diff --git a/src/utils_complex/import_integrals_ao_cplx.irp.f b/src/utils_complex/import_integrals_ao_cplx.irp.f
new file mode 100644
index 00000000..bf4f3693
--- /dev/null
+++ b/src/utils_complex/import_integrals_ao_cplx.irp.f
@@ -0,0 +1,159 @@
+program import_ao_integrals_complex
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  
+  integer :: iunit
+  integer :: getunitandopen
+
+  integer ::i,j,k,l
+  double precision :: integral
+  complex*16, allocatable       :: C(:,:)
+  double precision :: tmp_re, tmp_im
+
+  integer             :: n_integrals_1, n_integrals_2 
+  integer(key_kind), allocatable   :: buffer_i_1(:), buffer_i_2(:) 
+  real(integral_kind), allocatable :: buffer_values_1(:), buffer_values_2(:)
+  logical :: use_map1
+  integer(key_kind) :: idx_tmp
+  double precision :: sign
+
+
+!  call ezfio_set_ao_basis_ao_num(ao_num)
+
+  allocate (C(ao_num,ao_num))
+  
+  integral = huge(1.d0)
+  iunit = getunitandopen('E.qp','r')
+  read (iunit,*,end=9) integral
+  9 continue
+  close(iunit)
+  if (integral /= huge(1.d0)) then
+    call ezfio_set_nuclei_nuclear_repulsion(integral)
+    call ezfio_set_nuclei_io_nuclear_repulsion("Read")
+  endif
+
+  C = (0.d0,0.d0)
+  iunit = getunitandopen('T.qp','r')
+  do 
+    read (iunit,*,end=10) i,j, tmp_re, tmp_im
+    C(i,j) = dcmplx(tmp_re,tmp_im)
+    if (i.ne.j) then
+      C(j,i) = dcmplx(tmp_re,-tmp_im)
+    endif
+  enddo
+  10 continue
+  close(iunit)
+  call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_complex(C)
+  call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic("Read")
+
+  C = (0.d0,0.d0)
+  iunit = getunitandopen('S.qp','r')
+  do 
+    read (iunit,*,end=11) i,j, tmp_re, tmp_im
+    C(i,j) = dcmplx(tmp_re,tmp_im)
+    if (i.ne.j) then
+      C(j,i) = dcmplx(tmp_re,-tmp_im)
+    endif
+  enddo
+  11 continue
+  close(iunit)
+  call ezfio_set_ao_one_e_ints_ao_integrals_overlap_complex(C)
+  call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap("Read")
+
+  C = (0.d0,0.d0)
+  iunit = getunitandopen('P.qp','r')
+  do 
+    read (iunit,*,end=14) i,j, tmp_re, tmp_im
+    C(i,j) = dcmplx(tmp_re,tmp_im)
+    if (i.ne.j) then
+      C(j,i) = dcmplx(tmp_re,-tmp_im)
+    endif
+  enddo
+  14 continue
+  close(iunit)
+  call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_complex(C)
+  call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo("Read")
+
+  C = (0.d0,0.d0)
+  iunit = getunitandopen('V.qp','r')
+  do 
+    read (iunit,*,end=12) i,j, tmp_re, tmp_im
+    C(i,j) = dcmplx(tmp_re,tmp_im)
+    if (i.ne.j) then
+      C(j,i) = dcmplx(tmp_re,-tmp_im)
+    endif
+  enddo
+  12 continue
+  close(iunit)
+  call ezfio_set_ao_one_e_ints_ao_integrals_n_e_complex(C)
+  call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e("Read")
+
+  deallocate(C)
+
+  allocate(buffer_i_1(ao_num**3), buffer_values_1(ao_num**3))
+  allocate(buffer_i_2(ao_num**3), buffer_values_2(ao_num**3))
+  iunit = getunitandopen('W.qp','r')
+  n_integrals_1=0
+  n_integrals_2=0
+  buffer_values_1 = 0.d0
+  buffer_values_2 = 0.d0
+  do 
+    read (iunit,*,end=13) i,j,k,l, tmp_re, tmp_im
+    call ao_two_e_integral_complex_map_idx_sign(i,j,k,l,use_map1,idx_tmp,sign)
+    print'(4(I4),(L3),(I6),(F7.1))',i,j,k,l,use_map1,idx_tmp,sign
+    if (use_map1) then
+      n_integrals_1 += 1
+      buffer_i_1(n_integrals_1)=idx_tmp
+      buffer_values_1(n_integrals_1)=tmp_re
+      print'(A,4(I4),(I6),(E15.7))','map1',i,j,k,l,idx_tmp,tmp_re
+      if (sign.ne.0.d0) then 
+        n_integrals_1 += 1
+        buffer_i_1(n_integrals_1)=idx_tmp+1
+        buffer_values_1(n_integrals_1)=tmp_im*sign
+        print'(A,4(I4),(I6),(E15.7))','map1',i,j,k,l,idx_tmp+1,tmp_im*sign
+      endif 
+      if (n_integrals_1 >= size(buffer_i_1)-1) then
+        call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+        n_integrals_1 = 0
+      endif
+    else
+      n_integrals_2 += 1
+      buffer_i_2(n_integrals_2)=idx_tmp
+      buffer_values_2(n_integrals_2)=tmp_re
+      print'(A,4(I4),(I6),(E15.7))','map2',i,j,k,l,idx_tmp,tmp_re
+      if (sign.ne.0.d0) then
+        n_integrals_2 += 1
+        buffer_i_2(n_integrals_2)=idx_tmp+1
+        buffer_values_2(n_integrals_2)=tmp_im*sign
+        print'(A,4(I4),(I6),(E15.7))','map2',i,j,k,l,idx_tmp+1,tmp_im*sign
+      endif 
+      if (n_integrals_2 >= size(buffer_i_2)-1) then
+        call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+        n_integrals_2 = 0
+      endif
+    endif
+  enddo
+  13 continue
+  close(iunit)
+  
+  if (n_integrals_1 > 0) then
+    call insert_into_ao_integrals_map(n_integrals_1,buffer_i_1,buffer_values_1)
+  endif
+  if (n_integrals_2 > 0) then
+    call insert_into_ao_integrals_map_2(n_integrals_2,buffer_i_2,buffer_values_2)
+  endif
+
+  call map_sort(ao_integrals_map)
+  call map_unique(ao_integrals_map)
+  call map_sort(ao_integrals_map_2)
+  call map_unique(ao_integrals_map_2)
+
+  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_1',ao_integrals_map)
+  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_complex_2',ao_integrals_map_2)
+  call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
+  
+end
diff --git a/src/utils_complex/import_kconserv.irp.f b/src/utils_complex/import_kconserv.irp.f
new file mode 100644
index 00000000..d15b1eed
--- /dev/null
+++ b/src/utils_complex/import_kconserv.irp.f
@@ -0,0 +1,36 @@
+program import_kconserv
+
+  PROVIDE ezfio_filename
+  call run
+end
+
+subroutine run
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! read kconserv in physicists' notation order <ij|kl>
+  ! if kconserv(i,j,k)=l, then <ij|kl> is allowed by symmetry
+  ! NOTE: pyscf stores this internally in the order of chemists' notation (ik|jl)
+  END_DOC
+  
+  integer :: iunit
+  integer :: getunitandopen
+
+  integer ::i,j,k,l
+  integer, allocatable :: A(:,:,:)
+
+  allocate(A(kpt_num,kpt_num,kpt_num))
+  
+  A = 0
+  iunit = getunitandopen('K.qp','r')
+  do 
+    read (iunit,*,end=10) i,j,k,l
+    A(i,j,k) = l
+  enddo
+  10 continue
+  close(iunit)
+  call ezfio_set_nuclei_kconserv(A)
+  call ezfio_set_nuclei_io_kconserv("Read")
+  deallocate(A)
+
+end
diff --git a/src/utils_periodic/import_mo_coef_periodic.irp.f b/src/utils_complex/import_mo_coef_cplx.irp.f
similarity index 71%
rename from src/utils_periodic/import_mo_coef_periodic.irp.f
rename to src/utils_complex/import_mo_coef_cplx.irp.f
index bd41f776..bc87b744 100644
--- a/src/utils_periodic/import_mo_coef_periodic.irp.f
+++ b/src/utils_complex/import_mo_coef_cplx.irp.f
@@ -1,4 +1,4 @@
-program import_mo_coef_periodic
+program import_mo_coef_complex
 
   PROVIDE ezfio_filename
   call run
@@ -17,7 +17,8 @@ subroutine run
   
   iunit = getunitandopen('C.qp','r')
   do 
-    read (iunit,*,end=10) i,j, mo_coef(i,j), mo_coef_imag(i,j)
+    read (iunit,*,end=10) i,j, int_re, int_im
+    mo_coef_complex(i,j) = dcmplx(int_re,int_im)
   enddo
   10 continue
   close(iunit)
diff --git a/src/utils_complex/qp2-pbc-diff.txt b/src/utils_complex/qp2-pbc-diff.txt
new file mode 100644
index 00000000..8148248c
--- /dev/null
+++ b/src/utils_complex/qp2-pbc-diff.txt
@@ -0,0 +1,580 @@
+
+kpts:
+  changed matrices to block diagonal (1-e ints, fock, 1rdm)
+  double_allowed_mo_kpts(h1,h2,p1,p2,is_allowed)
+    {h,p}{1,2} indices in total mo_num (not per kpt)
+  double_allowed_kpts(kh1,kh2,kp1,kp2,is_allowed)
+    k{h,p}{1,2} k-point indices
+
+  only allow momentum-conserving excitations
+
+
+todo:
+  change everything to be blocked by kpt
+  elec_alpha_num_per_kpt (maybe add to mo_basis?)
+  bitmasks per kpt? (or at least occ/act/virt num and list)
+
+-------------------------------------------------------------------------------------
+old:
+  select_connected
+    select_singles_and_doubles (this should be separated real/complex)
+      spot_isinwf (same for real/complex)
+      splash_pq (separate real/complex)
+        get_d{0,1,2} (separate real/complex)
+      fill_buffer_double (separate real/complex)
+    started splash_pq, get_d{0,1,2}, fill_buffer_double for complex
+      need to check hole particle index ordering (also in select_singles_and_doubles)
+      need to check for coef dconjg
+      
+  fci
+    run_{,stochastic_}cipsi
+      everything okay except:
+        zmq_pt2{,_complex} (todo: combine real/complex)
+          selection buffer? (val, mini)?
+          selection_slave_inproc
+            run_selection_slave
+              select_connected
+              
+          pt2_slave_inproc
+            run_pt2_slave{,_large,_small}
+              select_connected
+
+        zmq_selection_complex
+          selection_collector
+            pull_selection_results
+            add_to_selection_buffer
+          selection_slave_inproc
+            run_selection_slave (has split for complex?)
+              select_connected
+    run_slave_cipsi
+      run_slave_main
+        change memory allocation for complex (first see how many arrays will need to change type)
+        run_pt2_slave (large/small?)
+          select_connected
+     selection_buffer:
+       if anything complex, need to change zmq calls
+         {push,pull}_pt2_results
+
+-------------------------------------------------------------------------------------
+
+old:
+  irp_align for complex?
+  zmq_put_psi_complex instead of branch inside zmq_put_psi?
+    are there cases where we call this without already being on a complex branch of code?
+
+  h_apply.irp.f
+    push/pull_pt2
+      pt2,norm_pert,h_pert_diag
+        types?
+        if complex, do we need to keep imag part? (should imag sum to zero?)
+  h_apply_{,{,no}zmq}.template.f
+    see generate_h_apply.py script
+    may need to modify
+  selectors
+    (looks like nothing in e_corr_selectors.irp.f is used elsewhere?)
+    (only e_corr_per_sel outside of src (provided in h apply gen script))
+    coef_hf_selector (inv, invsquared)
+      for real, is sign important, or just magnitude?
+    e_corr_per_selectors (is this used anywhere?)
+      provided in generate_h_apply.py?
+      <Di|H|HF> * c(Di) / c(HF)
+      complex, but does this matter?
+      is magnitude important or just real part?
+    i_H_HF_per_selectors
+      <Di|H|HF>
+      not used anywhere else, so no additional concerns other than for e_corr_per_selectors 
+    delta_E_per_selector
+
+general:
+  check for dependence on psi_det_sorted, clean up providers
+
+determinants:
+  (done) connected_to_ref.irp.f
+  (done) create_excitations.irp.f
+  (done?) density_matrix{,_complex}.irp.f
+           no one_e_dm_dagger_mo_spin_index_complex
+           need to test for complex
+  (done) determinants_bitmasks.irp.f
+  (****) determinants{_complex}.irp.f
+           mostly done
+           need to modify ocaml for psi_coef_complex_qp_edit?
+           save_wavefunction_specified? qp_edit save? (wrong for real?)
+  (done) energy.irp.f
+  (????) example.irp.f
+  (****) EZFIO.cfg
+  (done) filter_connected.irp.f
+  (done) fock_diag.irp.f
+  (****) h_apply.irp.f
+           added coef_complex to h_apply_buffer_type
+           either coef or coef_complex will remain unallocated
+           (if this causes problems (it shouldn't), maybe just allocate unused one with size 1?)
+           check {push,pull}_pt2
+             pt2, norm_pert, h_pert_diag types? (should be real? documentation?)
+  (****) h_apply_nozmq.template.f
+  (****) h_apply.template.f
+  (****) h_apply_zmq.template.f
+  (done) occ_pattern.irp.f
+           (might need to change if we change h_apply)
+  (done) prune_wf.irp.f
+  (done) psi_cas{,_complex}.irp.f
+           might be able to combine some providers??
+  (done) psi_energy_mono_elec.irp.f
+  (done) ref_bitmask.irp.f
+  (done?) s2{,_complex}.irp.f
+           remaining functions not needed?
+  (done) single_excitations.irp.f
+  (done?) single_excitation_two_e.irp.f
+  (done?) slater_rules.irp.f
+  (done?) slater_rules_wee_mono.irp.f
+  (done) sort_dets_ab.irp.f
+  spindeterminants.ezfio_config
+           need svd complex?
+  (done?) spindeterminants.irp.f
+           separated psi_bilinear_matrix_values from psi_bilinear_matrix_{rows,columns,order}
+           new provider for psi_bilinear_matrix_values_complex
+           same for bilinear matrix transp (no conjugate)
+           done except for specific functions that are commented with todo
+           remaining functions aren't called anywhere, so don't worry about them for now
+  (****) two_e_density_matrix.irp.pouet
+  (done) utils.irp.f
+  (done?) zmq.irp.f
+           make sure template is correct for put/get psi_coef_complex
+           (why is limit 2^23? is this specific for doubles? should we divide by 2 for complex*16?)
+           also depends on zmq_{put,get}_cdmatrix in zmq/put_get.irp.f
+           and broadcast_chunks_complex_double in mpi/mpi.irp.f
+
+
+davidson	
+  (****)  davidson_parallel.irp.f
+            davidson_slave_work
+              branch inside or outside?
+                (currently inside)
+              same broadcast size issue as in h_apply (2^23 elements)
+              needs h_s2_u_0_nstates_openmp_work_complex (be careful with transp/conjg)
+              needs davidson_push_results_async_send_complex
+            davidson_pu{sh,ll}_results{,_async_send}_complex
+              double sz?
+              does f77_zmq_send8 know about types, or just send raw data?
+            davidson_collector_complex
+              is {v,s}_t conjugate transpose or just transpose?
+  (****)  diagonalization_hs2_dressed.irp.f
+  (****)  diagonalize_ci.irp.f
+  (****)  EZFIO.cfg
+  (****)  ezfio_interface.irp.f
+  (****)  input.irp.f
+  (****)  print_e_components.irp.f
+  (****)  u0_h_u0.irp.f
+  (****)  u0_wee_u0.irp.f
+-------------------------------------------------------------------------------------
+
+for complex data, add extra dim (size 2) and treat as real in EZFIO.cfg
+
+no reuse of old provider for real part of complex arrays
+
+mo_coef_complex_kpts has nonzero blocks of mo_coef_complex
+
+AO 2e ints:
+  see doc for map index details
+  see src/hartree_fock/fock_matrix_hf_complex.irp.f for example of iterating over values in map
+
+MO 2e ints:
+  similar to AO 2e ints
+  maybe good idea to make map_get for two neighboring vals? (re/im parts)
+  only built from 3idx (not from 4idx transform)
+
+
+mapping:
+  changed so that all real ints (Jij, Kij, Jii) are in map2
+    <ij|ij>, <ij|ji>, <ii|ii>
+  some places in code assume that map1 ints can be real
+    (can remove once we are sure we like this mapping)
+
+translational symmetry:
+  kconserv array gives quartets <ij|kl> which are symmetry-allowed
+  k_i + k_j = k_k + k_l
+  I + J = K + L
+  kconserv(I,J,K)=L
+
+------------------------------------------------------------------------------
+
+TODO:
+symmetry
+  restructure arrays?
+    mo coef and mo 1e ints already separate from real part of code (easy to add extra dimension)
+    ao 1e ints could also be handled in same way as mo 1e ints
+  change to allow different numbers of frozen/virtual mos for different kpts
+    for now, all kpts must have same number of aos/mos
+  bitmasks for kpts?
+
+ao_one_e_ints
+	ao_overlap_abs for complex? <abs(i)|abs(j)> vs abs(<i|j>)
+	ao_integrals_n_e_per_atom_complex (should be simple, but currently we only use dummy nuclei)
+
+ao_two_e_ints (todo)
+ get_ao_two_e_integrals_non_zero_complex 
+ get_ao_two_e_integrals_non_zero_jl_complex 
+ get_ao_two_e_integrals_non_zero_jl_from_list_complex 
+
+mo_two_e_ints (todo)
+  get_mo_two_e_integrals_ij_complex
+  add_integrals_to_map_complex
+	add_integrals_to_map_three_indices_complex
+	add_integrals_to_map_no_exit_34_complex
+
+
+later:
+  calculation of pbc integrals in QP
+		ao_two_e_integral
+		ao_two_e_integral_schwartz_accel
+		compute_ao_two_e_integrals
+		[ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num)  ]
+		compute_ao_integrals_jl
+    ...
+
+
+
+NOTES:
+  2e integrals printed from pyscf are in physicists' notation
+  
+  mo energies from pyscf include ewald correction; in qp we just fold that into the nuclear repulsion
+    this may need to change for addition/removal of electrons
+      (shift in enuc depends on number of electrons)
+
+  3-index integrals
+    <ij|kl> = \sum_\mu (ik|\mu)(jl|\mu)
+    store (ik|\mu) for I<=K
+    if i>k, take conjugate transpose in first two dimensions
+
+    df_mo(:,:,mu,kjkl) = C(:,:,kj)^\dagger.df_ao(:,:,mu,kjkl).C(:,:,kl)
+    (note: might need to switch j/l depending on how we decide to store this)
+
+  2e int compound indexing
+    number of unique 4-tuples with 8-fold symmetry is a8(n)=n*(n+1)*(n^2+n+2)/8
+    number of unique 4-tuples with 4-fold symmetry is a4(n)=n^2*(n^2+3)/4
+    a8 is number of unique real 2e ints with n mos
+    a4 is number of unique* complex 2e ints with n mos (where p+i*q and p-i*q are counted as one, not two)
+    a4(n) = a8(n) + a8(n-1)
+    
+    we can already generate the list of <ij|kl> with unique values for the 8-fold case
+    the set of these for 4-fold symmetry is the union of the 8-fold set for n and the 8-fold set for n-1 with a simple transformation
+    <ij|kl>_{4,n} = <ij|kl>_{8,n} + <(k+1)j|i(l+1)>_{8,n-1}
+  
+
+  indices out of order; needed to switch for complex:
+    i_h_j_s2 for singles
+    i_h_j for singles
+    i_h_j_two_e for singles
+
+############################
+#      utils, ezfio, ...   #
+############################
+
+ocaml/Input_mo_basis.ml
+	added mo_coef_imag array (real)
+	still needs mo_coef_to_string and to_string?
+
+src/nuclei/EZFIO.cfg
+	[is_complex]
+		if true use periodic parts of code
+
+src/utils/linear_algebra.irp.f
+	complex versions of utils
+		(maybe put in separate file?)
+
+src/utils/map_module.f90
+	subroutine map_get_2
+		get two neighboring values from map
+		not tested or used
+
+
+src/utils_periodic/export_integrals_ao_periodic.irp.f
+	dump ints for testing
+
+src/utils_periodic/import_integrals_ao_periodic.irp.f
+	read ints from pyscf
+	TODO: don't read ao_num from stdin
+
+src/utils_periodic/import_mo_coef_complex.irp.f
+	read mo_coef from pyscf
+	
+
+
+#######################
+#    ao_one_e_ints    #
+#######################
+
+src/ao_one_e_ints/EZFIO.cfg
+	[ao_integrals_n_e_imag]
+	[ao_integrals_kinetic_imag]
+	[ao_integrals_pseudo_imag]
+	[ao_integrals_overlap_imag]
+	[ao_one_e_integrals_imag]
+
+
+src/ao_one_e_ints/ao_one_e_ints.irp.f
+	ao_one_e_integrals_imag
+		can only be read (not calculated)
+	ao_one_e_integrals_complex
+		formed from dcmplx(ao_one_e_integrals,ao_one_e_integrals_imag)
+		
+src/ao_one_e_ints/ao_overlap.irp.f
+src/ao_one_e_ints/kin_ao_ints.irp.f
+src/ao_one_e_ints/pot_ao_ints.irp.f
+src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
+	added _imag and _complex versions of all AO 1-e ints
+	each complex array is formed by combining real and imag arrays
+	imag arrays can only be read from disk
+	no complex/imag versions of ao_integrals_n_e_per_atom, but this should be straightforward if we need it later?
+	changed ao_overlap_abs so that it is set to cdabs(ao_overlap_complex) if (is_complex)
+		TODO: (maybe not the behavior we want)
+	added S_inv_complex
+		TODO: (no S_half_inv_complex yet)
+
+src/ao_one_e_ints/ao_ortho_canonical_complex.irp.f
+	ao_cart_to_sphe_coef_complex
+		just a copy of ao_cart_to_sphe_coef_complex with complex type for easier zgemm
+		(with different size if ao_cart_to_sphe_num is less than ao_num)
+		depends on ao_cart_to_sphe_coef_complex
+	ao_cart_to_sphe_overlap_complex
+		similar to real version, but uses ao_overlap_complex instead of ao_overlap
+	ao_ortho_cano_coef_inv_cplx
+		self-explanatory
+	ao_ortho_canonical_coef_complex
+	ao_ortho_canonical_num_complex
+		similar to real version
+		providers are linked, so easier to just make num_complex instead of using original num (even though they will both have the same value)
+		need to make sure this doesn't require any other downstream changes (i.e. replace ao_ortho_canonical_num with complex version if (is_complex))
+	ao_ortho_canonical_overlap_complex
+		similar to real version
+
+
+#######################
+#    ao_two_e_ints    #
+#######################
+
+
+src/ao_two_e_ints/map_integrals.irp.f
+	added ao_integrals_map_2 (provider linked to ao_integrals_map)
+	double size of both maps if (is_complex)
+	subroutine two_e_integrals_index_complex
+		same as real version, but return compound (2) indices to avoid recomputing
+	ao_integrals_cache_complex
+		similar to real version
+	subroutine ao_two_e_integral_complex_map_idx_sign
+		from i,j,k,l, return which map to use (T->1, F->2), location of real part of integral, sign of imaginary part of integral
+	complex*16 function get_ao_two_e_integral_complex_simple
+		args i,j,k,l,map1,map2
+		return complex integral composed of correct elements from one of the maps
+	complex*16 function get_ao_two_e_integral_complex
+		same behavior as _simple version, but checks cache first
+		returns integral from cache if possible, otherwise retrieves from map
+	subroutine get_ao_two_e_integrals_complex
+		same functionality as real version
+	subroutine insert_into_ao_integrals_map_2
+		needed for second map
+	get_ao_map_size, clear_ao_map
+		no new functions, but now these also handle map2
+	not implemented for periodic:
+		subroutine get_ao_two_e_integrals_non_zero
+		subroutine get_ao_two_e_integrals_non_zero_jl
+		subroutine get_ao_two_e_integrals_non_zero_jl_from_list
+	
+	
+src/ao_two_e_ints/two_e_integrals.irp.f
+	not implemented for periodic:
+		double precision function ao_two_e_integral
+		double precision function ao_two_e_integral_schwartz_accel
+		subroutine compute_ao_two_e_integrals
+		[ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num)  ]
+		subroutine compute_ao_integrals_jl
+		(and other integral calculation functions)
+	modified for periodic:
+		[ logical, ao_two_e_integrals_in_map ]
+			complex AO ints can only be read from disk (not calculated)
+	
+
+
+#######################
+#      mo_basis       #
+#######################
+
+
+src/mo_basis/track_orb.irp.f → src/bitmask/track_orb.irp.f
+	not implemented for periodic:
+		subroutine reorder_core_orb (should be modified for periodic)
+	modified for periodic:
+		subroutine initialize_mo_coef_begin_iteration
+	added for periodic:
+		[ complex*16, mo_coef_begin_iteration_complex, (ao_num,mo_num) ]
+			similar to real version
+	
+
+src/mo_basis/EZFIO.cfg
+	[mo_coef_imag]
+
+
+src/mo_basis/mos.irp.f
+	modified for periodic:
+		subroutine mix_mo_jk
+		
+src/mo_basis/mos_complex.irp.f
+	added for periodic:
+		[ double precision, mo_coef_imag, (ao_num,mo_num) ]
+		[ complex*16, mo_coef_complex, (ao_num,mo_num) ]
+		[ complex*16, mo_coef_in_ao_ortho_basis_complex, (ao_num, mo_num) ]
+		[ complex*16, mo_coef_transp_complex, (mo_num,ao_num) ]
+		[ complex*16, mo_coef_transp_complex_conjg, (mo_num,ao_num) ]
+			maybe not necessary?
+			might cause confusion having both of these?
+			maybe should add _noconjg to name of _transp so it's clear that it's just the transpose, and not the adjoint
+		subroutine ao_to_mo_complex
+		subroutine ao_ortho_cano_to_ao_cplx
+	
+src/mo_basis/utils.irp.f
+	not modified:
+		subroutine save_mos_no_occ (should be changed for periodic)
+		
+	subroutine save_mos_truncated(n)
+	subroutine save_mos
+		modified to write mo_coef_imag to disk
+		need to make sure this is handled correctly
+		either update mo_coef{,_imag} whenever mo_coef_complex changes, or just make sure they're updated before writing to disk
+		(or just put real/imag parts of mo_coef_complex into buffer to save and avoid directly working with mo_coef{,_imag})
+	
+	
+src/mo_basis/utils_periodic.irp.f
+	complex versions of functions from utils
+		mo_as_eigvectors_of_mo_matrix_complex
+		mo_as_svd_vectors_of_mo_matrix_complex
+		mo_as_svd_vectors_of_mo_matrix_eig_complex
+			these three subroutines modify mo_coef_complex, decide whether to update mo_coef{,_imag} here or elsewhere
+		mo_coef_new_as_svd_vectors_of_mo_matrix_eig_complex
+
+
+src/mo_guess/h_core_guess_routine.irp.f
+	subroutine hcore_guess
+		modified for periodic, but need to decide how to handle separate parts of mo_coef_complex when updated
+		(also has soft_touch mo_coef_complex)
+
+src/mo_guess/mo_ortho_lowdin_complex.irp.f
+	[complex*16, ao_ortho_lowdin_coef_complex, (ao_num,ao_num)]
+	[complex*16, ao_ortho_lowdin_overlap_complex, (ao_num,ao_num)]
+
+src/mo_guess/pot_mo_ortho_canonical_ints.irp.f
+	[complex*16, ao_ortho_cano_n_e_ints_cplx, (mo_num,mo_num)]
+src/mo_guess/pot_mo_ortho_lowdin_ints.irp.f
+	[complex*16, ao_ortho_lowdin_n_e_ints_cplx, (mo_num,mo_num)]
+
+
+#######################
+#    mo_one_e_ints    #
+#######################
+
+src/mo_one_e_ints/EZFIO.cfg
+	[mo_integrals_e_n_imag]
+	[mo_integrals_kinetic_imag] 
+	[mo_integrals_pseudo_imag]
+	[mo_integrals_pseudo_imag]
+
+src/mo_one_e_ints/ao_to_mo_complex.irp.f
+	mo_to_ao_complex
+	mo_to_ao_no_overlap_complex
+	[ complex*16, S_mo_coef_complex, (ao_num, mo_num) ]
+
+src/mo_one_e_ints/orthonormalize.irp.f
+	subroutine orthonormalize_mos
+		same issue as above with modification of mo_coef_complex
+
+src/mo_one_e_ints/mo_one_e_ints.irp.f
+src/mo_one_e_ints/kin_mo_ints.irp.f
+src/mo_one_e_ints/mo_overlap.irp.f
+src/mo_one_e_ints/pot_mo_ints.irp.f
+src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
+	TODO: decide how to handle these providers
+  for periodic AOs, we always read (can't compute)
+  for MOs, we can either read from disk or transform from AOs
+  simplest way might be to link all three providers (integrals{,_imag,_complex})
+  if (.not.is_complex), just ignore imag and complex arrays?
+  if (is_complex)
+    either read real/imag from disk and combine to form complex
+    or transform complex MO ints from complex AO ints and also assign real/imag parts to separate arrays?
+
+
+src/mo_one_e_ints/mo_overlap.irp.f
+	[ complex*16, mo_overlap_complex,(mo_num,mo_num) ]
+		TODO: add option to read from disk?
+		typical workflow from pyscf might include reading MO 1,2-e ints, ovlp, mo_coef
+		maybe just add check to converter to ensure they're orthonormal, and don't save them after that?
+
+
+
+#######################
+#         SCF         #
+#######################
+	
+src/hartree_fock/fock_matrix_hf_complex.irp.f
+	TODO for periodic:
+		[ complex*16, ao_two_e_integral_{alpha,beta}_complex, (ao_num, ao_num) ]
+			finish implementation (might need new version of two_e_integrals_index_reverse)
+	added for periodic:
+		[ complex*16, Fock_matrix_ao_{alpha,beta}_complex, (ao_num, ao_num) ]
+	
+	
+src/hartree_fock/scf.irp.f
+	modified for periodic:
+		subroutine create_guess
+			should work for periodic
+			TODO: decide what to do about mo_coef_complex and imag/real parts for touch/save!!!
+		TODO: call roothaan_hall_scf_complex if (is_complex)
+
+
+src/scf_utils/diagonalize_fock_complex.irp.f
+	[ complex*16, eigenvectors_Fock_matrix_mo_complex, (ao_num,mo_num) ]
+		similar to real version
+		make separate function in utils for lapack calls
+
+src/scf_utils/diis_complex.irp.f
+	[complex*16, FPS_SPF_Matrix_AO_complex, (AO_num, AO_num)]
+	[complex*16, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
+	linked providers:
+		[ double precision, eigenvalues_Fock_matrix_AO_complex, (AO_num) ]
+		[ complex*16, eigenvectors_Fock_matrix_AO_complex, (AO_num,AO_num) ]
+			TODO: finish implementing (need s_half_inv_complex)
+			note: eigvals is same type/size as real version
+			
+
+src/scf_utils/fock_matrix.irp.f
+	added checks to make sure we don't end up in real providers if (is_complex)
+		probably not necessary?
+	[ double precision, SCF_energy ]
+		modified for periodic
+		could also add check to ensure imaginary part is zero?
+
+src/scf_utils/fock_matrix_complex.irp.f
+	[ complex*16, Fock_matrix_mo_complex, (mo_num,mo_num) ]
+	[ double precision, Fock_matrix_diag_mo_complex, (mo_num)]
+		similar to real versions
+		added check to make sure diagonal elements of fock matrix are real
+	[ complex*16, Fock_matrix_mo_alpha_complex, (mo_num,mo_num) ]
+	[ complex*16, Fock_matrix_mo_beta_complex, (mo_num,mo_num) ]
+	[ complex*16, Fock_matrix_ao_complex, (ao_num, ao_num) ]
+
+
+src/scf_utils/huckel_complex.irp.f
+	similar to real version
+	could just put if (is_complex) branch in real version? (instead of making separate subroutine)
+	has soft_touch mo_coef_complex and call to save_mos (see other notes on real/imag parts)
+
+
+src/scf_utils/roothaan_hall_scf_complex.irp.f
+	subroutine Roothaan_Hall_SCF_complex
+		similar to real
+		has soft_touch mo_coef_complex and call to save_mos (see other notes on real/imag parts)
+
+src/scf_utils/scf_density_matrix_ao_complex.irp.f
+	complex versions of providers
+		[complex*16, SCF_density_matrix_ao_alpha_complex, (ao_num,ao_num) ]
+		[ complex*16, SCF_density_matrix_ao_beta_complex,  (ao_num,ao_num) ]
+		[ complex*16, SCF_density_matrix_ao_complex, (ao_num,ao_num) ]
+	
+
diff --git a/src/utils_periodic/import_integrals_ao_periodic.irp.f b/src/utils_periodic/import_integrals_ao_periodic.irp.f
deleted file mode 100644
index 79eb8fe0..00000000
--- a/src/utils_periodic/import_integrals_ao_periodic.irp.f
+++ /dev/null
@@ -1,137 +0,0 @@
-program print_integrals
-  print *, 'Number of AOs?'
-  read(*,*) ao_num
-  TOUCH ao_num
-  call run
-end
-
-subroutine run
-  use map_module
-  implicit none
-  
-  integer :: iunit
-  integer :: getunitandopen
-
-  integer ::i,j,k,l
-  double precision :: integral
-  double precision, allocatable :: A(:,:), B(:,:)
-  double precision :: tmp_re, tmp_im
-
-  integer             :: n_integrals 
-  integer(key_kind), allocatable   :: buffer_i(:) 
-  real(integral_kind), allocatable :: buffer_values(:)
-
-  call ezfio_set_ao_basis_ao_num(ao_num)
-
-  allocate (A(ao_num,ao_num), B(ao_num,ao_num) )
-  
-  A(1,1) = huge(1.d0)
-  iunit = getunitandopen('E.qp','r')
-  read (iunit,*,end=9) A(1,1)
-  9 continue
-  close(iunit)
-  if (A(1,1) /= huge(1.d0)) then
-    call ezfio_set_nuclei_nuclear_repulsion(A(1,1))
-    call ezfio_set_nuclei_io_nuclear_repulsion("Read")
-  endif
-
-  A = 0.d0
-  B = 0.d0
-  iunit = getunitandopen('T.qp','r')
-  do 
-    read (iunit,*,end=10) i,j, tmp_re, tmp_im
-    A(i,j) = tmp_re
-    B(i,j) = tmp_im
-    if (i.ne.j) then
-      A(j,i) =  tmp_re
-      B(j,i) = -tmp_im
-    endif
-  enddo
-  10 continue
-  close(iunit)
-  call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_ao_integrals_kinetic_imag(B(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic("Read")
-
-  A = 0.d0
-  B = 0.d0
-  iunit = getunitandopen('S.qp','r')
-  do 
-    read (iunit,*,end=11) i,j, tmp_re, tmp_im
-    A(i,j) = tmp_re
-    B(i,j) = tmp_im
-    if (i.ne.j) then
-      A(j,i) =  tmp_re
-      B(j,i) = -tmp_im
-    endif
-  enddo
-  11 continue
-  close(iunit)
-  call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_ao_integrals_overlap_imag(B(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap("Read")
-
-  A = 0.d0
-  B = 0.d0
-  iunit = getunitandopen('P.qp','r')
-  do 
-    read (iunit,*,end=14) i,j, tmp_re, tmp_im
-    A(i,j) = tmp_re
-    B(i,j) = tmp_im
-    if (i.ne.j) then
-      A(j,i) =  tmp_re
-      B(j,i) = -tmp_im
-    endif
-  enddo
-  14 continue
-  close(iunit)
-  call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A(1:ao_num,1:ao_num))
-  call ezfio_set_ao_one_e_ints_ao_integrals_pseudo_imag(B(1:ao_num,1:ao_num))
-  call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo("Read")
-
-  A = 0.d0
-  B = 0.d0
-  iunit = getunitandopen('V.qp','r')
-  do 
-    read (iunit,*,end=12) i,j, tmp_re, tmp_im
-    A(i,j) = tmp_re
-    B(i,j) = tmp_im
-    if (i.ne.j) then
-      A(j,i) =  tmp_re
-      B(j,i) = -tmp_im
-    endif
-  enddo
-  12 continue
-  close(iunit)
-  call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_ao_integrals_n_e_imag(B(1:ao_num, 1:ao_num))
-  call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e("Read")
-
-!  allocate(buffer_i(ao_num**3), buffer_values(ao_num**3))
-!  iunit = getunitandopen('W.qp','r')
-!  n_integrals=0
-!  buffer_values = 0.d0
-!  do 
-!    read (iunit,*,end=13) i,j,k,l, integral
-!    n_integrals += 1
-!    call two_e_integrals_index(i, j, k, l, buffer_i(n_integrals) )
-!    buffer_values(n_integrals) = integral
-!    if (n_integrals == size(buffer_i)) then
-!      call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values)
-!      n_integrals = 0
-!    endif
-!  enddo
-!  13 continue
-!  close(iunit)
-!  
-!  if (n_integrals > 0) then
-!    call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_values)
-!  endif
-!
-!  call map_sort(ao_integrals_map)
-!  call map_unique(ao_integrals_map)
-!
-!  call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map)
-!  call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read')
-  
-end
diff --git a/src/zmq/put_get.irp.f b/src/zmq/put_get.irp.f
index fce8722d..4669d0f6 100644
--- a/src/zmq/put_get.irp.f
+++ b/src/zmq/put_get.irp.f
@@ -443,6 +443,137 @@ integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_
 end
 
 
+integer function zmq_put_cdmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put a complex vector on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  character*(*)                  :: name
+  integer, intent(in)            :: size_x1, size_x2
+  integer*8, intent(in)          :: sze
+  complex*16, intent(in)         :: x(size_x1, size_x2)
+  integer*8                      :: rc, ni
+  integer                        :: j
+  character*(256)                :: msg
+
+  zmq_put_cdmatrix = 0
+
+  ni = size_x1
+  do j=1,size_x2
+    if (j == size_x2) then
+      ni = int(sze - int(j-1,8)*int(size_x1,8),8)
+    endif
+    write(msg,'(A,1X,I8,1X,A,I8.8)') 'put_data '//trim(zmq_state), worker_id, trim(name), j
+    rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+    if (rc /= len(trim(msg))) then
+      print *,  trim(msg)
+      zmq_put_cdmatrix = -1
+      print *,  'Failed in put_data', rc, j
+      return
+    endif
+
+    rc = f77_zmq_send8(zmq_to_qp_run_socket,x(1,j),ni*8_8*2,0)
+    if (rc /= ni*8_8*2) then
+      print *,  'Failed in send ', rc, j
+      zmq_put_cdmatrix = -1
+      return
+    endif
+
+    rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+    if (msg(1:rc) /= 'put_data_reply ok') then
+      print *,  trim(msg)
+      print *,  'Failed in recv ',  rc, j
+      zmq_put_cdmatrix = -1
+      return
+    endif
+  enddo
+
+end
+
+
+integer function zmq_get_cdmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get a float vector from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer, intent(in)            :: size_x1, size_x2
+  integer*8, intent(in)          :: sze
+  character*(*), intent(in)      :: name
+  complex*16, intent(out)        :: x(size_x1,size_x2)
+  integer*8                      :: rc, ni
+  integer*8                      :: j
+  character*(256)                :: msg
+
+  PROVIDE zmq_state
+  ! Success
+  zmq_get_cdmatrix = 0
+
+  if (mpi_master) then
+    ni = size_x1
+    do j=1, size_x2
+      if (j == size_x2) then
+        ni = sze - (j-1)*size_x1
+      endif
+      write(msg,'(A,1X,I8,1X,A,I8.8)') 'get_data '//trim(zmq_state), worker_id, trim(name),j
+      rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+      if (rc /= len(trim(msg))) then
+        print *,  trim(msg)
+        zmq_get_cdmatrix = -1
+        print *,  irp_here, 'rc /= len(trim(msg))'
+        print *,  irp_here, '  received : ', rc
+        print *,  irp_here, '  expected : ', len(trim(msg))
+        go to 10
+      endif
+
+      rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+      if (msg(1:14) /= 'get_data_reply') then
+        print *,  irp_here, 'msg(1:14) /= get_data_reply'
+        print *,  trim(msg)
+        zmq_get_cdmatrix = -1
+        go to 10
+      endif
+
+      rc = f77_zmq_recv8(zmq_to_qp_run_socket,x(1,j),ni*8_8*2,0)
+      !print *,irp_here, 'rc = ',rc
+      !print *,irp_here, 'ni = ',ni
+      !print *,irp_here, ' j = ',j
+      if (rc /= ni*8_8*2) then
+        print *,  irp_here, 'rc /= size_x1*8*2 : ', trim(name)
+        print *,  irp_here, '  received: ', rc
+        print *,  irp_here, '  expected: ', ni*8_8*2
+        zmq_get_cdmatrix = -1
+        go to 10
+      endif
+    enddo
+  endif
+
+  10 continue
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    integer :: ierr
+    include 'mpif.h'
+    call MPI_BCAST (zmq_get_cdmatrix, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      print *,  irp_here//': Unable to broadcast zmq_get_cdmatrix'
+      stop -1
+    endif
+    call MPI_BARRIER(MPI_COMM_WORLD,ierr)
+    call broadcast_chunks_complex_double(x, sze)
+  IRP_ENDIF
+
+end
+
+
 
 integer function zmq_put8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
   use f77_zmq