From 3194178dd24a12d92a2313c5f28affeeaa216b9a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Dec 2018 16:48:33 +0100 Subject: [PATCH] Removed uppercases in filenames --- scripts/generate_h_apply.py | 8 +- scripts/perturbation.py | 2 +- src/ao_basis/dimensions_integrals.irp.f | 4 +- src/ao_one_e_integrals/pot_ao_ints.irp.f | 14 +- .../pot_ao_pseudo_ints.irp.f | 4 +- src/bitmask/bitmasks.irp.f | 2 +- src/cis/{CIS.irp.f => cis.irp.f} | 0 src/cis/{H_apply.irp.f => h_apply.irp.f} | 0 src/cisd/{CISD.irp.f => cisd.irp.f} | 0 src/cisd/{H_apply.irp.f => h_apply.irp.f} | 0 ...gonalize_CI.irp.f => diagonalize_ci.irp.f} | 0 src/davidson/{u0Hu0.irp.f => u0_h_u0.irp.f} | 0 ...gonalize_CI.irp.f => diagonalize_ci.irp.f} | 0 .../{Fock_diag.irp.f => fock_diag.irp.f} | 0 .../{H_apply.irp.f => h_apply.irp.f} | 0 ...{H_apply.template.f => h_apply.template.f} | 0 ...mq.template.f => h_apply_nozmq.template.f} | 0 ..._zmq.template.f => h_apply_zmq.template.f} | 0 src/determinants/s2.irp.f | 2 +- src/determinants/slater_rules.irp.f | 4 +- src/determinants/spindeterminants.irp.f | 10 +- src/fci/EZFIO.cfg | 13 + src/fci/NEED | 7 + src/fci/README.rst | 112 ++ src/fci/energy.irp.f | 33 + src/fci/fci.irp.f | 147 ++ src/fci/pt2.irp.f | 40 + src/fci/pt2_stoch_routines.irp.f | 669 ++++++++ src/fci/run_pt2_slave.irp.f | 247 +++ src/fci/run_selection_slave.irp.f | 254 +++ src/fci/selection.irp.f | 1371 +++++++++++++++++ src/fci/selection_buffer.irp.f | 303 ++++ src/fci/selection_types.f90 | 9 + src/fci/zmq_selection.irp.f | 219 +++ src/four_idx/NEED | 1 + src/four_idx/README.rst | 6 + src/four_idx/four_idx_transform.irp.f | 12 + src/hartree_fock/EZFIO.cfg | 53 + src/hartree_fock/NEED | 4 + src/hartree_fock/README.rst | 33 + src/hartree_fock/diagonalize_fock.irp.f | 100 ++ src/hartree_fock/diis.irp.f | 139 ++ src/hartree_fock/fock_matrix.irp.f | 320 ++++ src/hartree_fock/hf_density_matrix_ao.irp.f | 41 + src/hartree_fock/huckel.irp.f | 34 + src/hartree_fock/roothaan_hall_scf.irp.f | 289 ++++ src/hartree_fock/scf.irp.f | 60 + .../{H_apply.irp.f => h_apply.irp.f} | 0 src/perturbation/perturbation.irp.f | 2 +- ...nearAlgebra.irp.f => linear_algebra.irp.f} | 0 50 files changed, 4542 insertions(+), 26 deletions(-) rename src/cis/{CIS.irp.f => cis.irp.f} (100%) rename src/cis/{H_apply.irp.f => h_apply.irp.f} (100%) rename src/cisd/{CISD.irp.f => cisd.irp.f} (100%) rename src/cisd/{H_apply.irp.f => h_apply.irp.f} (100%) rename src/davidson/{diagonalize_CI.irp.f => diagonalize_ci.irp.f} (100%) rename src/davidson/{u0Hu0.irp.f => u0_h_u0.irp.f} (100%) rename src/davidson_dressed/{diagonalize_CI.irp.f => diagonalize_ci.irp.f} (100%) rename src/determinants/{Fock_diag.irp.f => fock_diag.irp.f} (100%) rename src/determinants/{H_apply.irp.f => h_apply.irp.f} (100%) rename src/determinants/{H_apply.template.f => h_apply.template.f} (100%) rename src/determinants/{H_apply_nozmq.template.f => h_apply_nozmq.template.f} (100%) rename src/determinants/{H_apply_zmq.template.f => h_apply_zmq.template.f} (100%) create mode 100644 src/fci/EZFIO.cfg create mode 100644 src/fci/NEED create mode 100644 src/fci/README.rst create mode 100644 src/fci/energy.irp.f create mode 100644 src/fci/fci.irp.f create mode 100644 src/fci/pt2.irp.f create mode 100644 src/fci/pt2_stoch_routines.irp.f create mode 100644 src/fci/run_pt2_slave.irp.f create mode 100644 src/fci/run_selection_slave.irp.f create mode 100644 src/fci/selection.irp.f create mode 100644 src/fci/selection_buffer.irp.f create mode 100644 src/fci/selection_types.f90 create mode 100644 src/fci/zmq_selection.irp.f create mode 100644 src/four_idx/NEED create mode 100644 src/four_idx/README.rst create mode 100644 src/four_idx/four_idx_transform.irp.f create mode 100644 src/hartree_fock/EZFIO.cfg create mode 100644 src/hartree_fock/NEED create mode 100644 src/hartree_fock/README.rst create mode 100644 src/hartree_fock/diagonalize_fock.irp.f create mode 100644 src/hartree_fock/diis.irp.f create mode 100644 src/hartree_fock/fock_matrix.irp.f create mode 100644 src/hartree_fock/hf_density_matrix_ao.irp.f create mode 100644 src/hartree_fock/huckel.irp.f create mode 100644 src/hartree_fock/roothaan_hall_scf.irp.f create mode 100644 src/hartree_fock/scf.irp.f rename src/mrpt_utils/{H_apply.irp.f => h_apply.irp.f} (100%) rename src/utils/{LinearAlgebra.irp.f => linear_algebra.irp.f} (100%) diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py index a100dd88..a5f3049d 100755 --- a/scripts/generate_h_apply.py +++ b/scripts/generate_h_apply.py @@ -61,10 +61,10 @@ subroutine class H_apply(object): def read_template(self): - file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply.template.f','r') + file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply.template.f','r') self.template = file.read() file.close() - file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply_nozmq.template.f','r') + file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply_nozmq.template.f','r') self.template += file.read() file.close() @@ -439,10 +439,10 @@ class H_apply(object): class H_apply_zmq(H_apply): def read_template(self): - file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply.template.f','r') + file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply.template.f','r') self.template = file.read() file.close() - file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply_zmq.template.f','r') + file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply_zmq.template.f','r') self.template += file.read() file.close() diff --git a/scripts/perturbation.py b/scripts/perturbation.py index 1639bd00..225fcc1d 100755 --- a/scripts/perturbation.py +++ b/scripts/perturbation.py @@ -2,7 +2,7 @@ import os -Pert_dir = os.environ["QP_ROOT"]+"/src/Perturbation/" +Pert_dir = os.environ["QP_ROOT"]+"/src/perturbation/" perturbations = [] diff --git a/src/ao_basis/dimensions_integrals.irp.f b/src/ao_basis/dimensions_integrals.irp.f index 9bfd41b7..97fd83e1 100644 --- a/src/ao_basis/dimensions_integrals.irp.f +++ b/src/ao_basis/dimensions_integrals.irp.f @@ -6,13 +6,13 @@ END_DOC integer :: n_pt_sup integer :: prim_power_l_max - include 'Utils/constants.include.F' + include 'utils/constants.include.F' prim_power_l_max = maxval(ao_power) n_pt_max_integrals = 24 * prim_power_l_max + 4 n_pt_max_i_x = 8 * prim_power_l_max ASSERT (n_pt_max_i_x-1 <= max_dim) if (n_pt_max_i_x-1 > max_dim) then - print *, 'Increase max_dim in Utils/constants.include.F to ', n_pt_max_i_x-1 + print *, 'Increase max_dim in utils/constants.include.F to ', n_pt_max_i_x-1 stop 1 endif END_PROVIDER diff --git a/src/ao_one_e_integrals/pot_ao_ints.irp.f b/src/ao_one_e_integrals/pot_ao_ints.irp.f index 3c99aec0..2229d2ec 100644 --- a/src/ao_one_e_integrals/pot_ao_ints.irp.f +++ b/src/ao_one_e_integrals/pot_ao_ints.irp.f @@ -169,7 +169,7 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b double precision :: V_e_n,const_factor,dist_integral,tmp double precision :: accu,epsilo,rint integer :: n_pt_out,lmax - include 'Utils/constants.include.F' + include 'utils/constants.include.F' if ( (A_center(1)/=B_center(1)).or. & (A_center(2)/=B_center(2)).or. & (A_center(3)/=B_center(3)).or. & @@ -371,7 +371,7 @@ recursive subroutine I_x1_pol_mult_mono_elec(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) integer,intent(inout) :: nd integer, intent(in) :: a,c double precision, intent(in) :: R1x(0:2),R1xp(0:2),R2x(0:2) - include 'Utils/constants.include.F' + include 'utils/constants.include.F' double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y @@ -436,7 +436,7 @@ recursive subroutine I_x2_pol_mult_mono_elec(c,R1x,R1xp,R2x,d,nd,dim) ! Recursive routine involved in the electron-nucleus potential END_DOC integer , intent(in) :: dim - include 'Utils/constants.include.F' + include 'utils/constants.include.F' double precision :: d(0:max_dim) integer,intent(inout) :: nd integer, intent(in) :: c @@ -509,7 +509,7 @@ double precision function int_gaus_pol(alpha,n) double precision :: alpha integer :: n double precision :: dble_fact - include 'Utils/constants.include.F' + include 'utils/constants.include.F' int_gaus_pol = 0.d0 if(iand(n,1).eq.0)then @@ -535,7 +535,7 @@ double precision function V_r(n,alpha) END_DOC double precision :: alpha, fact integer :: n - include 'Utils/constants.include.F' + include 'utils/constants.include.F' if(iand(n,1).eq.1)then V_r = 0.5d0 * fact(shiftr(n,1)) / (alpha ** (shiftr(n,1) + 1)) else @@ -570,7 +570,7 @@ double precision function V_theta(n,m) END_DOC integer :: n,m,i double precision :: Wallis, prod - include 'Utils/constants.include.F' + include 'utils/constants.include.F' V_theta = 0.d0 prod = 1.d0 do i = 0,shiftr(n,1)-1 @@ -589,7 +589,7 @@ double precision function Wallis(n) END_DOC double precision :: fact integer :: n,p - include 'Utils/constants.include.F' + include 'utils/constants.include.F' if(iand(n,1).eq.0)then Wallis = fact(shiftr(n,1)) Wallis = pi * fact(n) / (dble(ibset(0_8,n)) * (Wallis+Wallis)*Wallis) diff --git a/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f b/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f index 807716d8..95703380 100644 --- a/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f +++ b/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f @@ -32,7 +32,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)] BEGIN_DOC ! Local pseudo-potential END_DOC - include 'Utils/constants.include.F' + include 'utils/constants.include.F' double precision :: alpha, beta, gama, delta integer :: num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) @@ -139,7 +139,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)] BEGIN_DOC ! Non-local pseudo-potential END_DOC - include 'Utils/constants.include.F' + include 'utils/constants.include.F' double precision :: alpha, beta, gama, delta integer :: num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) diff --git a/src/bitmask/bitmasks.irp.f b/src/bitmask/bitmasks.irp.f index d121ad25..918b38ed 100644 --- a/src/bitmask/bitmasks.irp.f +++ b/src/bitmask/bitmasks.irp.f @@ -2,7 +2,7 @@ use bitmasks BEGIN_PROVIDER [ integer, N_int ] implicit none - include 'Utils/constants.include.F' + include 'utils/constants.include.F' BEGIN_DOC ! Number of 64-bit integers needed to represent determinants as binary strings END_DOC diff --git a/src/cis/CIS.irp.f b/src/cis/cis.irp.f similarity index 100% rename from src/cis/CIS.irp.f rename to src/cis/cis.irp.f diff --git a/src/cis/H_apply.irp.f b/src/cis/h_apply.irp.f similarity index 100% rename from src/cis/H_apply.irp.f rename to src/cis/h_apply.irp.f diff --git a/src/cisd/CISD.irp.f b/src/cisd/cisd.irp.f similarity index 100% rename from src/cisd/CISD.irp.f rename to src/cisd/cisd.irp.f diff --git a/src/cisd/H_apply.irp.f b/src/cisd/h_apply.irp.f similarity index 100% rename from src/cisd/H_apply.irp.f rename to src/cisd/h_apply.irp.f diff --git a/src/davidson/diagonalize_CI.irp.f b/src/davidson/diagonalize_ci.irp.f similarity index 100% rename from src/davidson/diagonalize_CI.irp.f rename to src/davidson/diagonalize_ci.irp.f diff --git a/src/davidson/u0Hu0.irp.f b/src/davidson/u0_h_u0.irp.f similarity index 100% rename from src/davidson/u0Hu0.irp.f rename to src/davidson/u0_h_u0.irp.f diff --git a/src/davidson_dressed/diagonalize_CI.irp.f b/src/davidson_dressed/diagonalize_ci.irp.f similarity index 100% rename from src/davidson_dressed/diagonalize_CI.irp.f rename to src/davidson_dressed/diagonalize_ci.irp.f diff --git a/src/determinants/Fock_diag.irp.f b/src/determinants/fock_diag.irp.f similarity index 100% rename from src/determinants/Fock_diag.irp.f rename to src/determinants/fock_diag.irp.f diff --git a/src/determinants/H_apply.irp.f b/src/determinants/h_apply.irp.f similarity index 100% rename from src/determinants/H_apply.irp.f rename to src/determinants/h_apply.irp.f diff --git a/src/determinants/H_apply.template.f b/src/determinants/h_apply.template.f similarity index 100% rename from src/determinants/H_apply.template.f rename to src/determinants/h_apply.template.f diff --git a/src/determinants/H_apply_nozmq.template.f b/src/determinants/h_apply_nozmq.template.f similarity index 100% rename from src/determinants/H_apply_nozmq.template.f rename to src/determinants/h_apply_nozmq.template.f diff --git a/src/determinants/H_apply_zmq.template.f b/src/determinants/h_apply_zmq.template.f similarity index 100% rename from src/determinants/H_apply_zmq.template.f rename to src/determinants/h_apply_zmq.template.f diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index a473bd40..cda1404d 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -1,7 +1,7 @@ double precision function diag_S_mat_elem(key_i,Nint) implicit none use bitmasks - include 'Utils/constants.include.F' + include 'utils/constants.include.F' integer :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2) diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index 901ff734..aefbdbdb 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -1,6 +1,6 @@ subroutine get_excitation_degree(key1,key2,degree,Nint) use bitmasks - include 'Utils/constants.include.F' + include 'utils/constants.include.F' implicit none BEGIN_DOC ! Returns the excitation degree between two determinants @@ -1834,7 +1834,7 @@ end subroutine get_excitation_degree_spin(key1,key2,degree,Nint) use bitmasks - include 'Utils/constants.include.F' + include 'utils/constants.include.F' implicit none BEGIN_DOC ! Returns the excitation degree between two determinants diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index 06da6a31..675b6e60 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -961,7 +961,7 @@ subroutine get_all_spin_singles_and_doubles_1(buffer, idx, spindet, size_buffer, integer, intent(out) :: n_doubles integer :: i - include 'Utils/constants.include.F' + include 'utils/constants.include.F' integer :: degree @@ -1000,7 +1000,7 @@ subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_ integer, intent(out) :: n_singles integer :: i integer :: degree - include 'Utils/constants.include.F' + include 'utils/constants.include.F' n_singles = 1 do i=1,size_buffer @@ -1030,7 +1030,7 @@ subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_ integer, intent(out) :: doubles(size_buffer) integer, intent(out) :: n_doubles integer :: i - include 'Utils/constants.include.F' + include 'utils/constants.include.F' integer :: degree n_doubles = 1 @@ -1123,7 +1123,7 @@ subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, single integer, intent(out) :: n_singles integer :: i,k - include 'Utils/constants.include.F' + include 'utils/constants.include.F' integer(bit_kind) :: xorvec($N_int) integer :: degree @@ -1173,7 +1173,7 @@ subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, double integer, intent(out) :: n_doubles integer :: i,k, degree - include 'Utils/constants.include.F' + include 'utils/constants.include.F' integer(bit_kind) :: xorvec($N_int) n_doubles = 1 diff --git a/src/fci/EZFIO.cfg b/src/fci/EZFIO.cfg new file mode 100644 index 00000000..d5526673 --- /dev/null +++ b/src/fci/EZFIO.cfg @@ -0,0 +1,13 @@ +[energy] +type: double precision +doc: Calculated Selected |FCI| energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated |FCI| energy + |PT2| +interface: ezfio +size: (determinants.n_states) + + diff --git a/src/fci/NEED b/src/fci/NEED new file mode 100644 index 00000000..f7a3a46f --- /dev/null +++ b/src/fci/NEED @@ -0,0 +1,7 @@ +perturbation +selectors_full +generators_full +zmq +mpi +davidson_undressed +iterations diff --git a/src/fci/README.rst b/src/fci/README.rst new file mode 100644 index 00000000..3bb6fb24 --- /dev/null +++ b/src/fci/README.rst @@ -0,0 +1,112 @@ +=== +FCI +=== + +Selected Full Configuration Interaction. + +The :command:`FCI` program starts with a single determinant, or with the wave +function in the |EZFIO| database if :option:`determinants read_wf` is |true|. +Then, it will iteratively: + +* Select the most important determinants from the external space and add them to the + internal space +* If :option:`determinants s2_eig` is |true|, add all the necessary + determinants to allow the eigenstates of |H| to be eigenstates of |S^2| +* Diagonalize |H| in the enlarged internal space +* Compute (stochastically) the second-order perturbative contribution to the energy +* Extrapolate the variational energy by fitting + :math:`E=E_\text{FCI} - \alpha\, E_\text{PT2}` + + +The number of selected determinants at each iteration will be such that the +size of the wave function will double at every iteration. If :option:`determinants +s2_eig` is |true|, then the number of selected determinants will be 1.5x the +current number, and then all the additional determinants will be added. + +By default, the program will stop when more than one million determinants have +been selected, or when the |PT2| energy is below :math:`10^{-4}`. + +The variational and |PT2| energies of the iterations are stored in the +|EZFIO| database, in the :ref:`IterativeSave` module. + + + +Computation of the |PT2| energy +------------------------------- + +At each iteration, the |PT2| energy is computed considering the Epstein-Nesbet +zeroth-order Hamiltonian: + +.. math:: + + E_{\text{PT2}} = \sum_{ \alpha } + \frac{|\langle \Psi_S | \hat{H} | \alpha \rangle|^2} + {E - \langle \alpha | \hat{H} | \alpha \rangle} + +where the |kalpha| determinants are generated by applying all the single and +double excitation operators to all the determinants of the wave function +:math:`\Psi_G`. + +When the hybrid-deterministic/stochastic algorithm is chosen +(default), :math:`Psi_G = \Psi_S = \Psi`, the full wavefunction expanded in the +internal space. +When the deterministic algorithm is chosen (:option:`perturbation do_pt2` +is set to |false|), :math:`Psi_G` is a truncation of |Psi| using +:option:`determinants threshold_generators`, and :math:`Psi_S` is a truncation +of |Psi| using :option:`determinants threshold_selectors`, and re-weighted +by :math:`1/\langle \Psi_s | \Psi_s \rangle`. + +At every iteration, while computing the |PT2|, the variance of the wave +function is also computed: + +.. math:: + + \sigma^2 & = \langle \Psi | \hat{H}^2 | \Psi \rangle - + \langle \Psi | \hat{H} | \Psi \rangle^2 \\ + & = \sum_{i \in \text{FCI}} + \langle \Psi | \hat{H} | i \rangle + \langle i | \hat{H} | \Psi \rangle - + \langle \Psi | \hat{H} | \Psi \rangle^2 \\ + & = \sum_{ \alpha } + \langle |\Psi | \hat{H} | \alpha \rangle|^2. + +The expression of the variance is the same as the expression of the |PT2|, with +a denominator of 1. It measures how far the wave function is from the |FCI| +solution. Note that the absence of denominator in the Heat-Bath selected |CI| +method is selection method by minimization of the variance, whereas |CIPSI| is +a selection method by minimization of the energy. + + +If :option:`perturbation do_pt2` is set to |false|, then the stochastic +|PT2| is not computed, and an approximate value is obtained from the |CIPSI| +selection. The calculation is faster, but the extrapolated |FCI| value is +less accurate. This way of running the code should be used when the only +goal is to generate a wave function, as for using |CIPSI| wave functions as +trial wave functions of |QMC| calculations for example. + + +The :command:`PT2` program reads the wave function of the |EZFIO| database +and computes the energy and the |PT2| contribution. + + +State-averaging +--------------- + +Extrapolated |FCI| energy +------------------------- + +An estimate of the |FCI| energy is computed by extrapolating + +.. math:: + + E=E_\text{FCI} - \alpha\, E_\text{PT2} + +This extrapolation is done for all the requested states, and excitation +energies are printed as energy differences between the extrapolated +energies of the excited states and the extrapolated energy of the ground +state. + +The extrapolations are given considering the 2 last points, the 3 last points, ..., +the 7 last points. The extrapolated value should be chosen such that the extrpolated +value is stable with the number of points. + diff --git a/src/fci/energy.irp.f b/src/fci/energy.irp.f new file mode 100644 index 00000000..b7ba42bb --- /dev/null +++ b/src/fci/energy.irp.f @@ -0,0 +1,33 @@ +BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize pt2_E0_denominator + END_DOC + initialize_pt2_E0_denominator = .True. +END_PROVIDER + +BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] + implicit none + BEGIN_DOC + ! E0 in the denominator of the PT2 + END_DOC + if (initialize_pt2_E0_denominator) then + if (h0_type == "EN") then + pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) + else if (h0_type == "Barycentric") then + pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) + else if (h0_type == "Variance") then + pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) !1.d0-nuclear_repulsion + else if (h0_type == "SOP") then + pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) + else + print *, h0_type, ' not implemented' + stop + endif + call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator') + else + pt2_E0_denominator = -huge(1.d0) + endif +END_PROVIDER + + diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f new file mode 100644 index 00000000..01cca708 --- /dev/null +++ b/src/fci/fci.irp.f @@ -0,0 +1,147 @@ +program fci_zmq + implicit none + integer :: i,j,k + double precision, allocatable :: pt2(:), variance(:), norm(:), rpt2(:) + integer :: n_det_before, to_select + + double precision :: rss + double precision, external :: memory_of_double + rss = memory_of_double(N_states)*4.d0 + call check_mem(rss,irp_here) + + allocate (pt2(N_states), rpt2(N_states), norm(N_states), variance(N_states)) + + double precision :: hf_energy_ref + logical :: has + double precision :: relative_error + + PROVIDE H_apply_buffer_allocated + + relative_error=PT2_relative_error + + pt2 = -huge(1.e0) + rpt2 = -huge(1.e0) + norm = 0.d0 + variance = 0.d0 + + if (s2_eig) then + call make_s2_eigenfunction + endif + call diagonalize_CI + call save_wavefunction + + call ezfio_has_hartree_fock_energy(has) + if (has) then + call ezfio_get_hartree_fock_energy(hf_energy_ref) + else + hf_energy_ref = ref_bitmask_energy + endif + + if (N_det > N_det_max) then + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + N_det = N_det_max + soft_touch N_det psi_det psi_coef + call diagonalize_CI + call save_wavefunction + endif + + n_det_before = 0 + + double precision :: correlation_energy_ratio + double precision :: threshold_selectors_save, threshold_generators_save + threshold_selectors_save = threshold_selectors + threshold_generators_save = threshold_generators + double precision :: error(N_states) + + correlation_energy_ratio = 0.d0 + + do while ( & + (N_det < N_det_max) .and. & + (maxval(abs(pt2(1:N_states))) > pt2_max) .and. & + (correlation_energy_ratio <= correlation_energy_ratio_max) & + ) + write(*,'(A)') '--------------------------------------------------------------------------------' + + + if (do_pt2) then + pt2 = 0.d0 + variance = 0.d0 + norm = 0.d0 + threshold_selectors = 1.d0 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_selectors threshold_generators + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, norm) ! Stochastic PT2 + threshold_selectors = threshold_selectors_save + threshold_generators = threshold_generators_save + SOFT_TOUCH threshold_selectors threshold_generators + endif + + + correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & + (psi_energy_with_nucl_rep(1) + pt2(1) - hf_energy_ref) + correlation_energy_ratio = min(1.d0,correlation_energy_ratio) + + call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep+pt2) + call write_double(6,correlation_energy_ratio, 'Correlation ratio') + call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm) + + do k=1,N_states + rpt2(:) = pt2(:)/(1.d0 + norm(k)) + enddo + + call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call print_extrapolated_energy(psi_energy_with_nucl_rep(1:N_states),rpt2) + N_iter += 1 + + n_det_before = N_det + to_select = N_det + to_select = max(N_states_diag, to_select) + to_select = min(to_select, N_det_max-n_det_before) + call ZMQ_selection(to_select, pt2, variance, norm) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + call diagonalize_CI + call save_wavefunction + call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states)) + enddo + + if (N_det < N_det_max) then + call diagonalize_CI + call save_wavefunction + call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states)) + call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep(1:N_states)+pt2) + endif + + if (do_pt2) then + pt2 = 0.d0 + variance = 0.d0 + norm = 0.d0 + threshold_selectors = 1.d0 + threshold_generators = 1d0 + SOFT_TOUCH threshold_selectors threshold_generators + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance,norm) ! Stochastic PT2 + threshold_selectors = threshold_selectors_save + threshold_generators = threshold_generators_save + SOFT_TOUCH threshold_selectors threshold_generators + call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states)) + call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep(1:N_states)+pt2) + endif + print *, 'N_det = ', N_det + print *, 'N_sop = ', N_occ_pattern + print *, 'N_states = ', N_states + print*, 'correlation_ratio = ', correlation_energy_ratio + + + do k=1,N_states + rpt2(:) = pt2(:)/(1.d0 + norm(k)) + enddo + + call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call print_extrapolated_energy(psi_energy_with_nucl_rep(1:N_states),rpt2) + +end diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f new file mode 100644 index 00000000..c0c2e01d --- /dev/null +++ b/src/fci/pt2.irp.f @@ -0,0 +1,40 @@ +program pt2_stoch + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + PROVIDE mo_bielec_integrals_in_map + PROVIDE psi_energy + call run +end + +subroutine run + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision :: pt2(N_states) + integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in + + double precision :: E_CI_before(N_states), relative_error, error(N_states), variance(N_states), norm(N_states), rpt2(N_states) + + pt2(:) = 0.d0 + + E_CI_before(:) = psi_energy(:) + nuclear_repulsion + threshold_selectors = 1.d0 + threshold_generators = 1.d0 + relative_error=PT2_relative_error + + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, norm) ! Stochastic PT2 + do k=1,N_states + rpt2(:) = pt2(:)/(1.d0 + norm(k)) + enddo + + call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm) + + call ezfio_set_fci_energy(E_CI_before) + call ezfio_set_fci_energy_pt2(E_CI_before+pt2) +end + + diff --git a/src/fci/pt2_stoch_routines.irp.f b/src/fci/pt2_stoch_routines.irp.f new file mode 100644 index 00000000..a777f353 --- /dev/null +++ b/src/fci/pt2_stoch_routines.irp.f @@ -0,0 +1,669 @@ +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] + implicit none + logical, external :: testTeethBuilding + integer :: i + integer :: e + e = elec_num - n_core_orb * 2 + pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_selectors)))/10) + do i=1,N_det_generators + pt2_F(i) = 1 + int(dble(pt2_n_tasks_max)*maxval(dsqrt(dabs(psi_coef_sorted_gen(i,1:N_states))))) + enddo +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] + implicit none + logical, external :: testTeethBuilding + + if(N_det_generators < 1024) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = min(5, N_det_generators) + do pt2_N_teeth=100,2,-1 + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if + call write_int(6,pt2_N_teeth,'Number of comb teeth') +END_PROVIDER + + +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + rss = memory_of_double(2*N_det_generators+1) + call check_mem(rss,irp_here) + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 + enddo + + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + n0 = 0 + testTeethBuilding = .false. + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if (dabs(Wt) <= 1.d-3) then + return + endif + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + return + end if + end do + stop "exited testTeethBuilding" +end function + + + +subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm) + use f77_zmq + use selection_types + + implicit none + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull + integer, external :: omp_get_thread_num + double precision, intent(in) :: relative_error, E(N_states) + double precision, intent(out) :: pt2(N_states),error(N_states) + double precision, intent(out) :: variance(N_states),norm(N_states) + + + integer :: i + + double precision, external :: omp_get_wtime + double precision :: state_average_weight_save(N_states), w(N_states,4) + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + + if (N_det < max(10,N_states)) then + pt2=0.d0 + variance=0.d0 + norm=0.d0 + call ZMQ_selection(0, pt2, variance, norm) + error(:) = 0.d0 + else + + state_average_weight_save(:) = state_average_weight(:) + do pt2_stoch_istate=1,N_states + state_average_weight(:) = 0.d0 + state_average_weight(pt2_stoch_istate) = 1.d0 + TOUCH state_average_weight pt2_stoch_istate + + provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors + call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') + + integer, external :: zmq_put_psi + integer, external :: zmq_put_N_det_generators + integer, external :: zmq_put_N_det_selectors + integer, external :: zmq_put_dvector + integer, external :: zmq_put_ivector + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_generators on ZMQ server' + endif + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then + stop 'Unable to put pt2_stoch_istate on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then + stop 'Unable to put threshold_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then + stop 'Unable to put threshold_generators on ZMQ server' + endif + + + + integer, external :: add_task_to_taskserver + character(400000) :: task + + integer :: j,k,ipos,ifirst + ifirst=0 + + ipos=0 + do i=1,N_det_generators + if (pt2_F(i) > 1) then + ipos += 1 + endif + enddo + call write_int(6,sum(pt2_F),'Number of tasks') + call write_int(6,ipos,'Number of fragmented tasks') + + ipos=1 + do i= 1, N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) + ipos += 20 + if (ipos > 400000-20) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + if (ifirst == 0) then + ifirst=1 + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + endif + endif + end do + enddo + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + endif + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + + integer :: nproc_target + nproc_target = nproc + double precision :: mem + mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3) + call write_double(6,mem,'Estimated memory/thread (Gb)') + if (qp_max_mem > 0) then + nproc_target = max(1,int(dble(qp_max_mem)/mem)) + nproc_target = min(nproc_target,nproc) + endif + + call omp_set_nested(.false.) + + + print '(A)', '========== ================= =========== =============== =============== =================' + print '(A)', ' Samples Energy Stat. Err Variance Norm Seconds ' + print '(A)', '========== ================= =========== =============== =============== =================' + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w(1,1), w(1,2), w(1,3), w(1,4)) + pt2(pt2_stoch_istate) = w(pt2_stoch_istate,1) + error(pt2_stoch_istate) = w(pt2_stoch_istate,2) + variance(pt2_stoch_istate) = w(pt2_stoch_istate,3) + norm(pt2_stoch_istate) = w(pt2_stoch_istate,4) + + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') + + print '(A)', '========== ================= =========== =============== =============== =================' + + enddo +! call omp_set_nested(.false.) + + FREE pt2_stoch_istate + state_average_weight(:) = state_average_weight_save(:) + TOUCH state_average_weight + endif + do k=N_det+1,N_states + pt2(k) = 0.d0 + enddo + +end subroutine + + +subroutine pt2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_pt2_slave(1,i,pt2_e0_denominator) +end + + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, variance, norm) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(in) :: relative_error, E + double precision, intent(out) :: pt2(N_states), error(N_states) + double precision, intent(out) :: variance(N_states), norm(N_states) + + + double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) + double precision, allocatable :: vI(:,:), vI_task(:,:), T2(:) + double precision, allocatable :: nI(:,:), nI_task(:,:), T3(:) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks + integer, external :: zmq_abort + integer, external :: pt2_find_sample_lr + + integer :: more, n, i, p, c, t, n_tasks, U + integer, allocatable :: task_id(:) + integer, allocatable :: index(:) + + double precision, external :: omp_get_wtime + double precision :: v, x, x2, x3, avg, avg2, avg3, eqt, E0, v0, n0 + double precision :: time, time0 + + integer, allocatable :: f(:) + logical, allocatable :: d(:) + logical :: do_exit + + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + rss = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2) + rss += memory_of_double(N_states*N_det_generators)*3.d0 + rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0 + rss += memory_of_double(pt2_N_teeth+1)*4.d0 + call check_mem(rss,irp_here) + + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) + allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max)) + allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + allocate(T2(pt2_N_teeth+1), T3(pt2_N_teeth+1)) + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + pt2(:) = -huge(1.) + error(:) = huge(1.) + variance(:) = huge(1.) + norm(:) = 0.d0 + S(:) = 0d0 + S2(:) = 0d0 + T2(:) = 0d0 + T3(:) = 0d0 + n = 1 + t = 0 + U = 0 + eI(:,:) = 0d0 + vI(:,:) = 0d0 + nI(:,:) = 0d0 + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E + v0 = 0.d0 + n0 = 0.d0 + more = 1 + time0 = omp_get_wtime() + + do_exit = .false. + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do + + ! Deterministic part + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = 0.d0 + v0 = 0.d0 + n0 = 0.d0 + do i=pt2_n_0(t),1,-1 + E0 += eI(pt2_stoch_istate, i) + v0 += vI(pt2_stoch_istate, i) + n0 += nI(pt2_stoch_istate, i) + end do + else + exit + end if + end do + + ! Add Stochastic part + c = pt2_R(n) + if(c > 0) then + x = 0d0 + x2 = 0d0 + x3 = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1)) + x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + x2 += vI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + x3 += nI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x*x + T2(p) += x2 + T3(p) += x3 + end do + avg = E0 + S(t) / dble(c) + avg2 = v0 + T2(t) / dble(c) + avg3 = n0 + T3(t) / dble(c) + if ((avg /= 0.d0) .or. (n == N_det_generators) ) then + do_exit = .true. + endif + pt2(pt2_stoch_istate) = avg + variance(pt2_stoch_istate) = avg2 + norm(pt2_stoch_istate) = avg3 + ! 1/(N-1.5) : see Brugger, The American Statistician (23) 4 p. 32 (1969) + if(c > 2) then + eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + error(pt2_stoch_istate) = eqt + if(mod(c,10)==0 .or. n==N_det_generators) then + print '(G10.3, 2X, F16.10, 2X, G10.3, 2X, F14.10, 2X, F14.10, 2X, F10.4, A10)', c, avg+E, eqt, avg2, avg3, time-time0, '' + if(do_exit .and. (dabs(error(pt2_stoch_istate)) / (1.d-20 + dabs(pt2(pt2_stoch_istate)) ) <= relative_error)) then + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(10) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + endif + endif + endif + time = omp_get_wtime() + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' + endif + do i=1,n_tasks + eI(:, index(i)) += eI_task(:,i) + vI(:, index(i)) += vI_task(:,i) + nI(:, index(i)) += nI_task(:,i) + f(index(i)) -= 1 + end do + end if + end do + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) +end subroutine + + +integer function pt2_find_sample(v, w) + implicit none + double precision, intent(in) :: v, w(0:N_det_generators) + integer, external :: pt2_find_sample_lr + + pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators) +end function + + +integer function pt2_find_sample_lr(v, w, l_in, r_in) + implicit none + double precision, intent(in) :: v, w(0:N_det_generators) + integer, intent(in) :: l_in,r_in + integer :: i,l,r + + l=l_in + r=r_in + + do while(r-l > 1) + i = shiftr(r+l,1) + if(w(i) < v) then + l = i + else + r = i + end if + end do + i = r + do r=i+1,N_det_generators + if (w(r) /= w(i)) then + exit + endif + enddo + pt2_find_sample_lr = r-1 +end function + + +BEGIN_PROVIDER [ integer, pt2_n_tasks ] + implicit none + BEGIN_DOC + ! Number of parallel tasks for the Monte Carlo + END_DOC + pt2_n_tasks = N_det_generators +END_PROVIDER + +BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] + implicit none + integer, allocatable :: seed(:) + integer :: m,i + call random_seed(size=m) + allocate(seed(m)) + do i=1,m + seed(i) = i + enddo + call random_seed(put=seed) + deallocate(seed) + + call RANDOM_NUMBER(pt2_u) + END_PROVIDER + + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] + implicit none + integer :: N_c, N_j + integer :: U, t, i + double precision :: v + integer, external :: pt2_find_sample_lr + + logical, allocatable :: pt2_d(:) + integer :: m,l,r,k + integer :: ncache + integer, allocatable :: ii(:,:) + double precision :: dt + + ncache = min(N_det_generators,10000) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators) + call check_mem(rss,irp_here) + + allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators)) + + pt2_R(:) = 0 + pt2_d(:) = .false. + N_c = 0 + N_j = pt2_n_0(1) + do i=1,N_j + pt2_d(i) = .true. + pt2_J(i) = i + end do + + U = 0 + do while(N_j < pt2_n_tasks) + + if (N_c+ncache > N_det_generators) then + ncache = N_det_generators - N_c + endif + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k) + do k=1, ncache + dt = pt2_u_0 + do t=1, pt2_N_teeth + v = dt + pt2_W_T *pt2_u(N_c+k) + dt = dt + pt2_W_T + ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1)) + end do + enddo + !$OMP END PARALLEL DO + + do k=1,ncache + !ADD_COMB + N_c = N_c+1 + do t=1, pt2_N_teeth + i = ii(t,k) + if(.not. pt2_d(i)) then + N_j += 1 + pt2_J(N_j) = i + pt2_d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. pt2_d(U)) then + N_j += 1 + pt2_J(N_j) = U + pt2_d(U) = .true. + exit + end if + end do + if (N_j >= pt2_n_tasks) exit + end do + enddo + + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if + + deallocate(ii,pt2_d) + +END_PROVIDER + + + + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] + implicit none + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_double(2*N_det_generators+1) + call check_mem(rss,irp_here) + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 + enddo + + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then + exit + end if + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" + end if + end do + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + if (tooth_width == 0.d0) then + tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))) + endif + ASSERT(tooth_width > 0.d0) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do + end do + + pt2_cW(0) = 0d0 + do i=1,N_det_generators + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators +END_PROVIDER + + + + + diff --git a/src/fci/run_pt2_slave.irp.f b/src/fci/run_pt2_slave.irp.f new file mode 100644 index 00000000..33ee418e --- /dev/null +++ b/src/fci/run_pt2_slave.irp.f @@ -0,0 +1,247 @@ + +subroutine run_pt2_slave(thread,iproc,energy) + use f77_zmq + use selection_types + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, ctask, ltask + character*(512), allocatable :: task(:) + integer, allocatable :: task_id(:) + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: buf + logical :: done + + double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + integer :: n_tasks, k + integer, allocatable :: i_generator(:), subset(:) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int(pt2_n_tasks_max)*67.d0 + rss += memory_of_double(pt2_n_tasks_max)*(N_states*3) + call check_mem(rss,irp_here) + + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) + allocate(variance(N_states,pt2_n_tasks_max)) + allocate(norm(N_states,pt2_n_tasks_max)) + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + buf%N = 0 + n_tasks = 1 + call create_selection_buffer(0, 0, buf) + + done = .False. + n_tasks = 1 + do while (.not.done) + + n_tasks = max(1,n_tasks) + n_tasks = min(pt2_n_tasks_max,n_tasks) + + integer, external :: get_tasks_from_taskserver + if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then + exit + endif + done = task_id(n_tasks) == 0 + if (done) n_tasks = n_tasks-1 + if (n_tasks == 0) exit + + do k=1,n_tasks + read (task(k),*) subset(k), i_generator(k) + enddo + + double precision :: time0, time1 + call wall_time(time0) + do k=1,n_tasks + pt2(:,k) = 0.d0 + variance(:,k) = 0.d0 + norm(:,k) = 0.d0 + buf%cur = 0 +!double precision :: time2 +!call wall_time(time2) + call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),buf,subset(k),pt2_F(i_generator(k))) +!call wall_time(time1) +!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) + enddo + call wall_time(time1) +!print *, i_generator(1), time1-time0, n_tasks + + integer, external :: tasks_done_to_taskserver + if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then + done = .true. + endif + call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, task_id, n_tasks) + + ! Try to adjust n_tasks around nproc/8 seconds per job + n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/8) / (time1 - time0 + 1.d0))) + end do + + integer, external :: disconnect_from_taskserver + do i=1,300 + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit + call sleep(1) + print *, 'Retry disconnect...' + end do + + call end_zmq_push_socket(zmq_socket_push,thread) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call delete_selection_buffer(buf) +end subroutine + + +subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, task_id, n_tasks) + use f77_zmq + use selection_types + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(in) :: pt2(N_states,n_tasks) + double precision, intent(in) :: variance(N_states,n_tasks) + double precision, intent(in) :: norm(N_states,n_tasks) + integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) + integer :: rc + + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if (rc == -1) then + return + endif + if(rc /= 4) stop 'push' + + + rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE) + if (rc == -1) then + return + endif + if(rc /= 4*n_tasks) stop 'push' + + + rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE) + if (rc == -1) then + return + endif + if(rc /= 8*N_states*n_tasks) stop 'push' + + rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE) + if (rc == -1) then + return + endif + if(rc /= 8*N_states*n_tasks) stop 'push' + + rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE) + if (rc == -1) then + return + endif + if(rc /= 8*N_states*n_tasks) stop 'push' + + rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, 0) + if (rc == -1) then + return + endif + if(rc /= 4*n_tasks) stop 'push' + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if (rc == -1) then + return + endif + if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif +IRP_ENDIF + +end subroutine + + +subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks) + use f77_zmq + use selection_types + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(inout) :: pt2(N_states,*) + double precision, intent(inout) :: variance(N_states,*) + double precision, intent(inout) :: norm(N_states,*) + integer, intent(out) :: index(*) + integer, intent(out) :: n_tasks, task_id(*) + integer :: rc, rn, i + + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 4) stop 'pull' + + rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 4*n_tasks) stop 'pull' + + rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 8*N_states*n_tasks) stop 'pull' + + rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 8*N_states*n_tasks) stop 'pull' + + rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 8*N_states*n_tasks) stop 'pull' + + rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if(rc /= 4*n_tasks) stop 'pull' + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc == -1) then + n_tasks = 1 + task_id(1) = 0 + endif + if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif +IRP_ENDIF + +end subroutine + diff --git a/src/fci/run_selection_slave.irp.f b/src/fci/run_selection_slave.irp.f new file mode 100644 index 00000000..bd171fbd --- /dev/null +++ b/src/fci/run_selection_slave.irp.f @@ -0,0 +1,254 @@ +subroutine run_selection_slave(thread,iproc,energy) + use f77_zmq + use selection_types + implicit none + + double precision, intent(in) :: energy(N_states) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, task_id(1), ctask, ltask + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: buf, buf2 + logical :: done, buffer_ready + double precision :: pt2(N_states) + double precision :: variance(N_states) + double precision :: norm(N_states) + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + buf%N = 0 + buffer_ready = .False. + ctask = 1 + pt2(:) = 0d0 + variance(:) = 0d0 + norm(:) = 0.d0 + + do + integer, external :: get_task_from_taskserver + if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then + exit + endif + done = task_id(ctask) == 0 + if (done) then + ctask = ctask - 1 + else + integer :: i_generator, N, subset + read(task,*) subset, i_generator, N + if(buf%N == 0) then + ! Only first time + call create_selection_buffer(N, N*2, buf) + call create_selection_buffer(N, N*2, buf2) + buffer_ready = .True. + else + ASSERT (N == buf%N) + end if + call select_connected(i_generator,energy,pt2,variance,norm,buf,subset,pt2_F(i_generator)) + endif + + integer, external :: task_done_to_taskserver + + if(done .or. ctask == size(task_id)) then + do i=1, ctask + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then + call sleep(1) + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then + ctask = 0 + done = .true. + exit + endif + endif + end do + if(ctask > 0) then + call sort_selection_buffer(buf) + call merge_selection_buffers(buf,buf2) + call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) + buf%mini = buf2%mini + pt2(:) = 0d0 + variance(:) = 0d0 + norm(:) = 0d0 + buf%cur = 0 + end if + ctask = 0 + end if + + if(done) exit + ctask = ctask + 1 + end do + + + integer, external :: disconnect_from_taskserver + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + continue + endif + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + if (buffer_ready) then + call delete_selection_buffer(buf) + call delete_selection_buffer(buf2) + endif +end subroutine + + +subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_id, ntask) + use f77_zmq + use selection_types + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(in) :: pt2(N_states) + double precision, intent(in) :: variance(N_states) + double precision, intent(in) :: norm(N_states) + type(selection_buffer), intent(inout) :: b + integer, intent(in) :: ntask, task_id(*) + integer :: rc + + rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE) + if(rc /= 4) then + print *, 'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)' + endif + + if (b%cur > 0) then + + rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) then + print *, 'f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) then + print *, 'f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) then + print *, 'f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE) + if(rc /= 8*b%cur) then + print *, 'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE) + if(rc /= bit_kind*N_int*2*b%cur) then + print *, 'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)' + endif + + endif + + rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) + if(rc /= 4) then + print *, 'f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)' + endif + + rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) + if(rc /= 4*ntask) then + print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)' + endif + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif +IRP_ENDIF + +end subroutine + + +subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det, N, task_id, ntask) + use f77_zmq + use selection_types + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(inout) :: pt2(N_states) + double precision, intent(inout) :: variance(N_states) + double precision, intent(inout) :: norm(N_states) + double precision, intent(out) :: val(*) + integer(bit_kind), intent(out) :: det(N_int, 2, *) + integer, intent(out) :: N, ntask, task_id(*) + integer :: rc, rn, i + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) then + print *, 'f77_zmq_recv( zmq_socket_pull, N, 4, 0)' + endif + + if (N>0) then + rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0) + if(rc /= 8*N_states) then + print *, 'f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0) + if(rc /= 8*N_states) then + print *, 'f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0) + if(rc /= 8*N_states) then + print *, 'f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0) + if(rc /= 8*N) then + print *, 'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0) + if(rc /= bit_kind*N_int*2*N) then + print *, 'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)' + endif + else + pt2(:) = 0.d0 + endif + + rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) + if(rc /= 4) then + print *, 'f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)' + endif + + rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) + if(rc /= 4*ntask) then + print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)' + endif + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif +IRP_ENDIF +end subroutine + + + diff --git a/src/fci/selection.irp.f b/src/fci/selection.irp.f new file mode 100644 index 00000000..4b3a989c --- /dev/null +++ b/src/fci/selection.irp.f @@ -0,0 +1,1371 @@ +use bitmasks + + +subroutine get_mask_phase(det1, pm, Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(out) :: pm(Nint,2) + integer(bit_kind) :: tmp + integer :: ispin, i + do ispin=1,2 + tmp = 0_8 + do i=1,Nint + pm(i,ispin) = ieor(det1(i,ispin), shiftl(det1(i,ispin), 1)) + pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 2)) + pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 4)) + pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 8)) + pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 16)) + pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 32)) + pm(i,ispin) = ieor(pm(i,ispin), tmp) + if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + end do + end do + +end subroutine + + +subroutine select_connected(i_generator,E0,pt2,variance,norm,b,subset,csubset) + use bitmasks + use selection_types + implicit none + integer, intent(in) :: i_generator, subset, csubset + type(selection_buffer), intent(inout) :: b + double precision, intent(inout) :: pt2(N_states) + double precision, intent(inout) :: variance(N_states) + double precision, intent(inout) :: norm(N_states) + integer :: k,l + double precision, intent(in) :: E0(N_states) + + integer(bit_kind) :: hole_mask(N_int,2), particle_mask(N_int,2) + + double precision, allocatable :: fock_diag_tmp(:,:) + + allocate(fock_diag_tmp(2,mo_tot_num+1)) + + call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) + + do l=1,N_generators_bitmask + do k=1,N_int + hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole,l), psi_det_generators(k,1,i_generator)) + hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole,l), psi_det_generators(k,2,i_generator)) + particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) + particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) + enddo + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,b,subset,csubset) + enddo + deallocate(fock_diag_tmp) +end subroutine + + +double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) + use bitmasks + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: phasemask(Nint,2) + integer, intent(in) :: s1, s2, h1, h2, p1, p2 + logical :: change + integer :: np + double precision, save :: res(0:1) = (/1d0, -1d0/) + + integer :: h1_int, h2_int + integer :: p1_int, p2_int + integer :: h1_bit, h2_bit + integer :: p1_bit, p2_bit + h1_int = shiftr(h1-1,bit_kind_shift)+1 + h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1 + + h2_int = shiftr(h2-1,bit_kind_shift)+1 + h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1 + + p1_int = shiftr(p1-1,bit_kind_shift)+1 + p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1 + + p2_int = shiftr(p2-1,bit_kind_shift)+1 + p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1 + + + ! Put the phasemask bits at position 0, and add them all + h1_bit = int(shiftr(phasemask(h1_int,s1),h1_bit)) + p1_bit = int(shiftr(phasemask(p1_int,s1),p1_bit)) + h2_bit = int(shiftr(phasemask(h2_int,s2),h2_bit)) + p2_bit = int(shiftr(phasemask(p2_int,s2),p2_bit)) + + np = h1_bit + p1_bit + h2_bit + p2_bit + + if(p1 < h1) np = np + 1 + if(p2 < h2) np = np + 1 + + if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1 + get_phase_bi = res(iand(np,1)) +end + + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, k, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, mo_bielec_integral + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2, p1, h1, h2) + hij = hij * get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2, N_int) + do k=1,N_states + vect(k,puti) = vect(k,puti) + hij * coefs(k) + enddo + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = mo_bielec_integral(pmob, pfix, hmob, hfix) + hij = hij * get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix, N_int) + do k=1,N_states + vect(k,puti) = vect(k,puti) + hij * coefs(k) + enddo + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (mo_bielec_integral(p1,p2,h1,h2) - mo_bielec_integral(p2,p1,h1,h2)) + hij = hij * get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2, N_int) + do k=1,N_states + vect(k,puti) = vect(k,puti) + hij * coefs(k) + enddo + end if + end if +end + + + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh, k + logical :: ok + + logical, allocatable :: lbanned(:) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi, mo_bielec_integral + + allocate (lbanned(mo_tot_num)) + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + + double precision :: hij_cache(mo_tot_num,2) + call get_mo_bielec_integrals(hole,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(hole,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map) + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = hij_cache(i,1)-hij_cache(i,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, sp, sp, i, p1, hole, p2, N_int) + do k=1,N_states + vect(k,i) = vect(k,i) + hij * coefs(k) + enddo + endif + end do + do i=hole+1,mo_tot_num + if(lbanned(i)) cycle + hij = hij_cache(i,2)-hij_cache(i,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, sp, sp, hole, p1, i, p2, N_int) + do k=1,N_states + vect(k,i) = vect(k,i) + hij * coefs(k) + enddo + endif + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + do k=1,N_states + vect(k,p2) = vect(k,p2) + hij * coefs(k) + enddo + else + p2 = p(1, sh) + call get_mo_bielec_integrals(hole,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map) + do i=1,mo_tot_num + if(lbanned(i)) cycle + hij = hij_cache(i,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, sp, sh, i, p1, hole, p2, N_int) + do k=1,N_states + vect(k,i) = vect(k,i) + hij * coefs(k) + enddo + endif + end do + end if + deallocate(lbanned) + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + do k=1,N_states + vect(k,p1) = vect(k,p1) + hij * coefs(k) + enddo +end + + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i,k + logical :: ok + + logical, allocatable :: lbanned(:) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + allocate(lbanned(mo_tot_num)) + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_tot_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + do k=1,N_states + vect(k,i) = vect(k,i) + hij * coefs(k) + enddo + end do + deallocate(lbanned) +end + + +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,buf,subset,csubset) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted + END_DOC + + integer, intent(in) :: i_generator, subset, csubset + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + double precision, intent(inout) :: variance(N_states) + double precision, intent(inout) :: norm(N_states) + type(selection_buffer), intent(inout) :: buf + + integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) + logical :: fullMatch, ok + + integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) + integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) + integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + logical, allocatable :: banned(:,:,:), bannedOrb(:,:) + + double precision, allocatable :: mat(:,:,:) + + logical :: monoAdo, monoBdo + integer :: maskInd + + integer(bit_kind), allocatable:: preinteresting_det(:,:,:) + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int( (8*N_int+5)*N_det + N_det_alpha_unique + 4*N_int*N_det_selectors) + rss += memory_of_double(mo_tot_num*mo_tot_num*(N_states+1)) + call check_mem(rss,irp_here) + allocate (preinteresting_det(N_int,2,N_det)) + + monoAdo = .true. + monoBdo = .true. + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) + enddo + + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + integer :: l_a, nmax, idx + integer, allocatable :: indices(:), exc_degree(:), iorder(:) + allocate (indices(N_det), & + exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + k=1 + do i=1,N_det_alpha_unique + call get_excitation_degree_spin(psi_det_alpha_unique(1,i), & + psi_det_generators(1,1,i_generator), exc_degree(i), N_int) + enddo + + do j=1,N_det_beta_unique + call get_excitation_degree_spin(psi_det_beta_unique(1,j), & + psi_det_generators(1,2,i_generator), nt, N_int) + if (nt > 2) cycle + do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1 + i = psi_bilinear_matrix_rows(l_a) + if (nt + exc_degree(i) <= 4) then + idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + indices(k) = idx + k=k+1 + endif + enddo + enddo + + do i=1,N_det_beta_unique + call get_excitation_degree_spin(psi_det_beta_unique(1,i), & + psi_det_generators(1,2,i_generator), exc_degree(i), N_int) + enddo + + do j=1,N_det_alpha_unique + call get_excitation_degree_spin(psi_det_alpha_unique(1,j), & + psi_det_generators(1,1,i_generator), nt, N_int) + if (nt > 1) cycle + do l_a=psi_bilinear_matrix_transp_rows_loc(j), psi_bilinear_matrix_transp_rows_loc(j+1)-1 + i = psi_bilinear_matrix_transp_columns(l_a) + if (exc_degree(i) < 3) cycle + if (nt + exc_degree(i) <= 4) then + idx = psi_det_sorted_order( & + psi_bilinear_matrix_order( & + psi_bilinear_matrix_transp_order(l_a))) + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + indices(k) = idx + k=k+1 + endif + enddo + enddo + + deallocate(exc_degree) + nmax=k-1 + + allocate(iorder(nmax)) + do i=1,nmax + iorder(i) = i + enddo + call isort(indices,iorder,nmax) + deallocate(iorder) + + allocate(preinteresting(0:N_det), prefullinteresting(0:N_det), & + interesting(0:N_det), fullinteresting(0:N_det)) + preinteresting(0) = 0 + prefullinteresting(0) = 0 + + do i=1,N_int + negMask(i,1) = not(psi_det_generators(i,1,i_generator)) + negMask(i,2) = not(psi_det_generators(i,2,i_generator)) + end do + + do k=1,nmax + i = indices(k) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + do j=2,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 4) then + if(i <= N_det_selectors) then + preinteresting(0) = preinteresting(0) + 1 + preinteresting(preinteresting(0)) = i + do j=1,N_int + preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i) + preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i) + enddo + else if(nt <= 2) then + prefullinteresting(0) = prefullinteresting(0) + 1 + prefullinteresting(prefullinteresting(0)) = i + end if + end if + end do + deallocate(indices) + + allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) + allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2)) + allocate (mat(N_states, mo_tot_num, mo_tot_num)) + maskInd = -1 + + integer :: nb_count, maskInd_save + logical :: monoBdo_save + logical :: found + do s1=1,2 + do i1=N_holes(s1),1,-1 ! Generate low excitations first + + found = .False. + monoBdo_save = monoBdo + maskInd_save = maskInd + do s2=s1,2 + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=N_holes(s2),ib,-1 + maskInd = maskInd + 1 + if(mod(maskInd, csubset) == (subset-1)) then + found = .True. + end if + enddo + if(s1 /= s2) monoBdo = .false. + enddo + + if (.not.found) cycle + monoBdo = monoBdo_save + maskInd = maskInd_save + + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) + + negMask = not(pmask) + + interesting(0) = 0 + fullinteresting(0) = 0 + + do ii=1,preinteresting(0) + select case (N_int) + case (1) + mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + case (2) + mobMask(1:2,1) = iand(negMask(1:2,1), preinteresting_det(1:2,1,ii)) + mobMask(1:2,2) = iand(negMask(1:2,2), preinteresting_det(1:2,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + & + popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2)) + case (3) + mobMask(1:3,1) = iand(negMask(1:3,1), preinteresting_det(1:3,1,ii)) + mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii)) + nt = 0 + do j=3,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case (4) + mobMask(1:4,1) = iand(negMask(1:4,1), preinteresting_det(1:4,1,ii)) + mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii)) + nt = 0 + do j=4,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + case default + mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii)) + mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii)) + nt = 0 + do j=N_int,1,-1 + if (mobMask(j,1) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 1)) + if (nt > 4) exit + endif + if (mobMask(j,2) /= 0_bit_kind) then + nt = nt+ popcnt(mobMask(j, 2)) + if (nt > 4) exit + endif + end do + end select + + if(nt <= 4) then + i = preinteresting(ii) + interesting(0) = interesting(0) + 1 + interesting(interesting(0)) = i + minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii) + minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii) + do j=2,N_int + minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii) + minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii) + enddo + if(nt <= 2) then + fullinteresting(0) = fullinteresting(0) + 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii) + fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii) + do j=2,N_int + fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii) + fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii) + enddo + end if + end if + + end do + + do ii=1,prefullinteresting(0) + i = prefullinteresting(ii) + nt = 0 + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + if (nt > 2) cycle + do j=N_int,2,-1 + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + if (nt > 2) exit + end do + + if(nt <= 2) then + fullinteresting(0) = fullinteresting(0) + 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) + fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) + do j=2,N_int + fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) + fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) + enddo + end if + end do + + do s2=s1,2 + sp = s1 + + if(s1 /= s2) sp = 3 + + ib = 1 + if(s1 == s2) ib = i1+1 + monoAdo = .true. + do i2=N_holes(s2),ib,-1 ! Generate low excitations first + + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + banned = .false. + do j=1,mo_tot_num + bannedOrb(j, 1) = .true. + bannedOrb(j, 2) = .true. + enddo + do s3=1,2 + do i=1,N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. + enddo + enddo + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. + end if + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + end if + end if + + maskInd = maskInd + 1 + if(mod(maskInd, csubset) == (subset-1)) then + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + if(fullMatch) cycle + + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf) + end if + enddo + if(s1 /= s2) monoBdo = .false. + enddo + enddo + enddo + deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) + deallocate(minilist, fullminilist, banned, bannedOrb,mat) +end subroutine + + + +subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf) + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator, sp, h1, h2 + double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + double precision, intent(inout) :: variance(N_states) + double precision, intent(inout) :: norm(N_states) + type(selection_buffer), intent(inout) :: buf + logical :: ok + integer :: s1, s2, p1, p2, ib, j, istate + integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) + double precision :: e_pert, delta_E, val, Hii, sum_e_pert, tmp, alpha_h_psi, coef + double precision, external :: diag_H_mat_elem_fock + double precision :: E_shift + + logical, external :: detEq + + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + E_shift = 0.d0 + + if (h0_type == "SOP") then + j = det_to_occ_pattern(i_generator) + E_shift = psi_det_Hii(i_generator) - psi_occ_pattern_Hii(j) + endif + + do p1=1,mo_tot_num + if(bannedOrb(p1, s1)) cycle + ib = 1 + if(sp /= 3) ib = p1+1 + do p2=ib,mo_tot_num + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + + if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) + + sum_e_pert = 0d0 + + do istate=1,N_states + delta_E = E0(istate) - Hii + E_shift + alpha_h_psi = mat(istate, p1, p2) + val = alpha_h_psi + alpha_h_psi + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + e_pert = 0.5d0 * (tmp - delta_E) + coef = alpha_h_psi / delta_E + pt2(istate) = pt2(istate) + e_pert + variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi + norm(istate) = norm(istate) + coef * coef + + if (h0_type == "Variance") then + sum_e_pert = sum_e_pert - alpha_h_psi * alpha_h_psi * state_average_weight(istate) + else + sum_e_pert = sum_e_pert + e_pert * state_average_weight(istate) + endif + end do + + if(sum_e_pert <= buf%mini) then + call add_to_selection_buffer(buf, det, sum_e_pert) + end if + end do + end do +end + + +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) + use bitmasks + implicit none + + integer, intent(in) :: sp, i_gen, N_sel + integer, intent(in) :: interesting(0:N_sel) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int,2) + + PROVIDE psi_selectors_coef_transp + mat = 0d0 + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel ! interesting(0) + !i = interesting(ii) + if (interesting(i) < 0) then + stop 'prefetch interesting(i)' + endif + + + mobMask(1,1) = iand(negMask(1,1), det(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), det(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + + if(nt > 4) cycle + + do j=2,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 4) cycle + + if (interesting(i) == i_gen) then + if(sp == 3) then + do k=1,mo_tot_num + do j=1,mo_tot_num + banned(j,k,2) = banned(k,j,1) + enddo + enddo + else + do k=1,mo_tot_num + do l=k+1,mo_tot_num + banned(l,k,1) = banned(k,l,1) + end do + end do + end if + end if + + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + + if (interesting(i) >= i_gen) then + perMask(1,1) = iand(mask(1,1), not(det(1,1,i))) + perMask(1,2) = iand(mask(1,2), not(det(1,2,i))) + do j=2,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) + + call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int) + if(nt == 4) then + call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else if(nt == 3) then + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + end if + else + if(nt == 4) call past_d2(banned, p, sp) + if(nt == 3) call past_d1(bannedOrb, p) + end if + end do + +end + + +subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_bielec_integral + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + do k=1,N_states + mat(k, putj, puti) = mat(k, putj, puti) +coefs(k) * hij + enddo + else + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij + enddo + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + + hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij + enddo + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij + enddo + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + do k=1,N_states + mat(k, min(puti, putj), max(puti, putj)) = mat(k, min(puti, putj), max(puti, putj)) + coefs(k) * hij + enddo + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij + enddo + end if + end if + end if +end + + +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num) + double precision, external :: get_phase_bi, mo_bielec_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib, k + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + PROVIDE mo_integrals_map N_int + + allocate (lbanned(mo_tot_num, 2)) + allocate (hij_cache(mo_tot_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + call get_mo_bielec_integrals(hfix,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(hfix,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map) + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + do k=1,N_states + tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k) + enddo + endif + end do + do putj=hfix+1, mo_tot_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + endif + end do + + if(ma == 1) then + mat(1:N_states,1:mo_tot_num,puti) = mat(1:N_states,1:mo_tot_num,puti) + tmp_row(1:N_states,1:mo_tot_num) + else + mat(1:N_states,puti,1:mo_tot_num) = mat(1:N_states,puti,1:mo_tot_num) + tmp_row(1:N_states,1:mo_tot_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + call get_mo_bielec_integrals(hfix,pfix,p1,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(hfix,pfix,p2,mo_tot_num,hij_cache(1,2),mo_integrals_map) + do puti=1,mo_tot_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + do k=1,N_states + tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k) ! HOTSPOT + enddo + endif + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + do k=1,N_states + tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k) + enddo + endif + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + call get_mo_bielec_integrals(hfix,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(hfix,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma)) cycle + if(banned(putj,puti,1)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + endif + end do + do putj=hfix+1,mo_tot_num + if(lbanned(putj,ma)) cycle + if(banned(putj,puti,1)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + endif + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti,puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + call get_mo_bielec_integrals(hfix,p1,pfix,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(hfix,p2,pfix,mo_tot_num,hij_cache(1,2),mo_integrals_map) + do puti=1,mo_tot_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + do k=1,N_states + tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + do k=1,N_states + tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k) + enddo + endif + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned,hij_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + p1 = p(i1,s1) + if(bannedOrb(p1, s1)) cycle + do i2=ib,p(0,s2) + p2 = p(i2,s2) + if(bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do +end + + + + +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_bielec_integral + logical :: ok + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + allocate (hij_cache(mo_tot_num,2)) + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + + do p2=1, mo_tot_num + if(bannedOrb(p2,2)) cycle + call get_mo_bielec_integrals(p2,h1,h2,mo_tot_num,hij_cache(1,1),mo_integrals_map) + do p1=1, mo_tot_num + if(bannedOrb(p1, 1)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache(p1,1) * phase + end if + do k=1,N_states + mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_tot_num + if(bannedOrb(puti, sp)) cycle + call get_mo_bielec_integrals(puti,p2,p1,mo_tot_num,hij_cache(1,1),mo_integrals_map) + call get_mo_bielec_integrals(puti,p1,p2,mo_tot_num,hij_cache(1,2),mo_integrals_map) + do putj=puti+1, mo_tot_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + if (hij /= 0.d0) then + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + endif + else + hij = hij_cache(putj,1) - hij_cache(putj,2) + if (hij /= 0.d0) then + hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + do k=1,N_states + mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij + enddo + endif + end if + end do + end do + end if + deallocate(hij_cache) +end + + +subroutine past_d1(bannedOrb, p) + use bitmasks + implicit none + + logical, intent(inout) :: bannedOrb(mo_tot_num, 2) + integer, intent(in) :: p(0:4, 2) + integer :: i,s + + do s = 1, 2 + do i = 1, p(0, s) + bannedOrb(p(i, s), s) = .true. + end do + end do +end + + +subroutine past_d2(banned, p, sp) + use bitmasks + implicit none + + logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) + integer, intent(in) :: p(0:4, 2), sp + integer :: i,j + + if(sp == 3) then + do i=1,p(0,1) + do j=1,p(0,2) + banned(p(i,1), p(j,2)) = .true. + end do + end do + else + do i=1,p(0, sp) + do j=1,i-1 + banned(p(j,sp), p(i,sp)) = .true. + banned(p(i,sp), p(j,sp)) = .true. + end do + end do + end if +end + + + +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) + use bitmasks + implicit none + + integer, intent(in) :: i_gen, N + integer, intent(in) :: interesting(0:N) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3) + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + do j=1, N_int + if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl + if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl + end do + + if(interesting(i) < i_gen) then + fullMatch = .true. + return + end if + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + end do + + call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int) + call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int) + banned(list(1), list(2)) = .true. + end do genl +end + + +subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Gives the inidices(+1) of the bits set to 1 in the bit string + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: string(Nint) + integer, intent(out) :: list(Nint*bit_kind_size) + integer, intent(out) :: n_elements + + integer :: i, ishift + integer(bit_kind) :: l + + n_elements = 0 + ishift = 2 + do i=1,Nint + l = string(i) + do while (l /= 0_bit_kind) + n_elements = n_elements+1 + list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l) + l = iand(l,l-1_bit_kind) + enddo + ishift = ishift + bit_kind_size + enddo + +end diff --git a/src/fci/selection_buffer.irp.f b/src/fci/selection_buffer.irp.f new file mode 100644 index 00000000..3053213f --- /dev/null +++ b/src/fci/selection_buffer.irp.f @@ -0,0 +1,303 @@ + +subroutine create_selection_buffer(N, siz_, res) + use selection_types + implicit none + + integer, intent(in) :: N, siz_ + type(selection_buffer), intent(out) :: res + + integer :: siz + siz = max(siz_,1) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_double(siz)*(N_int*2+1) + call check_mem(rss,irp_here) + + allocate(res%det(N_int, 2, siz), res%val(siz)) + + res%val(:) = 0d0 + res%det(:,:,:) = 0_8 + res%N = N + res%mini = 0d0 + res%cur = 0 +end subroutine + +subroutine delete_selection_buffer(b) + use selection_types + implicit none + type(selection_buffer), intent(inout) :: b + if (associated(b%det)) then + deallocate(b%det) + endif + if (associated(b%val)) then + deallocate(b%val) + endif +end + + +subroutine add_to_selection_buffer(b, det, val) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer(bit_kind), intent(in) :: det(N_int, 2) + double precision, intent(in) :: val + integer :: i + + if(b%N > 0 .and. val <= b%mini) then + b%cur += 1 + b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) + b%val(b%cur) = val + if(b%cur == size(b%val)) then + call sort_selection_buffer(b) + end if + end if +end subroutine + +subroutine merge_selection_buffers(b1, b2) + use selection_types + implicit none + BEGIN_DOC +! Merges the selection buffers b1 and b2 into b2 + END_DOC + type(selection_buffer), intent(inout) :: b1 + type(selection_buffer), intent(inout) :: b2 + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i, i1, i2, k, nmwen + if (b1%cur == 0) return + do while (b1%val(b1%cur) > b2%mini) + b1%cur = b1%cur-1 + if (b1%cur == 0) then + return + endif + enddo + nmwen = min(b1%N, b1%cur+b2%cur) + double precision :: rss + double precision, external :: memory_of_double + rss = memory_of_double(size(b1%val)) + 2*N_int*memory_of_double(size(b1%det,3)) + call check_mem(rss,irp_here) + allocate( val(size(b1%val)), detmp(N_int, 2, size(b1%det,3)) ) + i1=1 + i2=1 + do i=1,nmwen + if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then + exit + else if (i1 > b1%cur) then + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + else if (i2 > b2%cur) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + if (b1%val(i1) <= b2%val(i2)) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + endif + endif + enddo + deallocate(b2%det, b2%val) + do i=nmwen+1,b2%N + val(i) = 0.d0 + detmp(1:N_int,1:2,i) = 0_bit_kind + enddo + b2%det => detmp + b2%val => val + b2%mini = min(b2%mini,b2%val(b2%N)) + b2%cur = nmwen +end + + +subroutine sort_selection_buffer(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + integer :: i, nmwen + logical, external :: detEq + if (b%N == 0 .or. b%cur == 0) return + nmwen = min(b%N, b%cur) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3)) + call check_mem(rss,irp_here) + allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) + do i=1,b%cur + iorder(i) = i + end do + call dsort(b%val, iorder, b%cur) + do i=1, nmwen + detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) + detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) + end do + deallocate(b%det,iorder) + b%det => detmp + b%mini = min(b%mini,b%val(b%N)) + b%cur = nmwen +end subroutine + +subroutine make_selection_buffer_s2(b) + use selection_types + type(selection_buffer), intent(inout) :: b + + integer(bit_kind), allocatable :: o(:,:,:) + double precision, allocatable :: val(:) + + integer :: n_d + integer :: i,k,sze,n_alpha,j,n + logical :: dup + + ! Sort + integer, allocatable :: iorder(:) + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: occ_pattern_search_key + integer(bit_kind), allocatable :: tmp_array(:,:,:) + logical, allocatable :: duplicate(:) + + n_d = b%cur + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + rss = (4*N_int+4)*memory_of_double(n_d) + call check_mem(rss,irp_here) + allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), & + tmp_array(N_int,2,n_d), val(n_d) ) + + do i=1,n_d + do k=1,N_int + o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i)) + o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i)) + enddo + iorder(i) = i + bit_tmp(i) = occ_pattern_search_key(o(1,1,i),N_int) + enddo + + deallocate(b%det) + + call i8sort(bit_tmp,iorder,n_d) + + do i=1,n_d + do k=1,N_int + tmp_array(k,1,i) = o(k,1,iorder(i)) + tmp_array(k,2,i) = o(k,2,iorder(i)) + enddo + val(i) = b%val(iorder(i)) + duplicate(i) = .False. + enddo + + ! Find duplicates + do i=1,n_d-1 + if (duplicate(i)) then + cycle + endif + j = i+1 + do while (bit_tmp(j)==bit_tmp(i)) + if (duplicate(j)) then + j+=1 + if (j>n_d) then + exit + endif + cycle + endif + dup = .True. + do k=1,N_int + if ( (tmp_array(k,1,i) /= tmp_array(k,1,j)) & + .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then + dup = .False. + exit + endif + enddo + if (dup) then +! val(i) = val(i) + val(j) + val(i) = max(val(i), val(j)) + duplicate(j) = .True. + endif + j+=1 + if (j>n_d) then + exit + endif + enddo + enddo + + deallocate (b%val) + ! Copy filtered result + integer :: n_p + n_p=0 + do i=1,n_d + if (duplicate(i)) then + cycle + endif + n_p = n_p + 1 + do k=1,N_int + o(k,1,n_p) = tmp_array(k,1,i) + o(k,2,n_p) = tmp_array(k,2,i) + enddo + val(n_p) = val(i) + enddo + + ! Sort by importance + do i=1,n_p + iorder(i) = i + end do + call dsort(val,iorder,n_p) + do i=1,n_p + do k=1,N_int + tmp_array(k,1,i) = o(k,1,iorder(i)) + tmp_array(k,2,i) = o(k,2,iorder(i)) + enddo + enddo + do i=1,n_p + do k=1,N_int + o(k,1,i) = tmp_array(k,1,i) + o(k,2,i) = tmp_array(k,2,i) + enddo + enddo + + ! Create determinants + n_d = 0 + do i=1,n_p + call occ_pattern_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int) + n_d = n_d + sze + if (n_d > b%cur) then +! if (n_d - b%cur > b%cur - n_d + sze) then +! n_d = n_d - sze +! endif + exit + endif + enddo + + rss = (4*N_int+2)*memory_of_double(n_d) + call check_mem(rss,irp_here) + allocate(b%det(N_int,2,2*n_d), b%val(2*n_d)) + k=1 + do i=1,n_p + n=n_d + call occ_pattern_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int) + call occ_pattern_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int) + do j=k,k+n-1 + b%val(j) = val(i) + enddo + k = k+n + if (k > n_d) exit + enddo + deallocate(o) + b%N = 2*n_d + b%cur = n_d +end + + + diff --git a/src/fci/selection_types.f90 b/src/fci/selection_types.f90 new file mode 100644 index 00000000..29e48524 --- /dev/null +++ b/src/fci/selection_types.f90 @@ -0,0 +1,9 @@ +module selection_types + type selection_buffer + integer :: N, cur + integer(8) , pointer :: det(:,:,:) + double precision, pointer :: val(:) + double precision :: mini + endtype +end module + diff --git a/src/fci/zmq_selection.irp.f b/src/fci/zmq_selection.irp.f new file mode 100644 index 00000000..ca1e87e9 --- /dev/null +++ b/src/fci/zmq_selection.irp.f @@ -0,0 +1,219 @@ +subroutine ZMQ_selection(N_in, pt2, variance, norm) + use f77_zmq + use selection_types + + implicit none + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket , zmq_socket_pull + integer, intent(in) :: N_in + type(selection_buffer) :: b + integer :: i, N + integer, external :: omp_get_thread_num + double precision, intent(out) :: pt2(N_states) + double precision, intent(out) :: variance(N_states) + double precision, intent(out) :: norm(N_states) + +! PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators + + N = max(N_in,1) + if (.True.) then + PROVIDE pt2_e0_denominator nproc + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection') + + integer, external :: zmq_put_psi + integer, external :: zmq_put_N_det_generators + integer, external :: zmq_put_N_det_selectors + integer, external :: zmq_put_dvector + + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_generators on ZMQ server' + endif + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then + stop 'Unable to put threshold_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then + stop 'Unable to put threshold_generators on ZMQ server' + endif + call create_selection_buffer(N, N*2, b) + endif + + integer, external :: add_task_to_taskserver + character(len=100000) :: task + integer :: j,k,ipos + ipos=1 + task = ' ' + + do i= 1, N_det_generators + do j=1,pt2_F(i) + write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N + ipos += 30 + if (ipos > 100000-30) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + enddo + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + endif + + + ASSERT (associated(b%det)) + ASSERT (associated(b%val)) + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + integer :: nproc_target + nproc_target = nproc + double precision :: mem + mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3) + call write_double(6,mem,'Estimated memory/thread (Gb)') + if (qp_max_mem > 0) then + nproc_target = max(1,int(dble(qp_max_mem)/mem)) + nproc_target = min(nproc_target,nproc) + endif + + f(:) = 1.d0 + if (.not.do_pt2) then + double precision :: f(N_states), u_dot_u + do k=1,min(N_det,N_states) + f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors) + enddo + endif + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, variance, norm) PRIVATE(i) NUM_THREADS(nproc_target+1) + i = omp_get_thread_num() + if (i==0) then + call selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) + else + call selection_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection') + do i=N_det+1,N_states + pt2(i) = 0.d0 + variance(i) = 0.d0 + norm(i) = 0.d0 + enddo + if (N_in > 0) then + if (s2_eig) then + call make_selection_buffer_s2(b) + endif + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) + call copy_H_apply_buffer_to_wf() + call save_wavefunction + endif + call delete_selection_buffer(b) + do k=1,N_states + pt2(k) = pt2(k) * f(k) + variance(k) = variance(k) * f(k) + norm(k) = norm(k) * f(k) + enddo + +end subroutine + + +subroutine selection_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_selection_slave(1,i,pt2_e0_denominator) +end + +subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + type(selection_buffer), intent(inout) :: b + integer, intent(in) :: N + double precision, intent(inout) :: pt2(N_states) + double precision, intent(inout) :: variance(N_states) + double precision, intent(inout) :: norm(N_states) + double precision :: pt2_mwen(N_states) + double precision :: variance_mwen(N_states) + double precision :: norm_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, ntask + double precision, pointer :: val(:) + integer(bit_kind), pointer :: det(:,:,:) + integer, allocatable :: task_id(:) + type(selection_buffer) :: b2 + + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + call create_selection_buffer(N, N*2, b2) + double precision :: rss + double precision, external :: memory_of_int + rss = memory_of_int(N_det_generators) + call check_mem(rss,irp_here) + allocate(task_id(N_det_generators)) + more = 1 + pt2(:) = 0d0 + variance(:) = 0.d0 + norm(:) = 0.d0 + pt2_mwen(:) = 0.d0 + variance_mwen(:) = 0.d0 + norm_mwen(:) = 0.d0 + do while (more == 1) + call pull_selection_results(zmq_socket_pull, pt2_mwen, variance_mwen, norm_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) + + pt2(:) += pt2_mwen(:) + variance(:) += variance_mwen(:) + norm(:) += norm_mwen(:) + do i=1, b2%cur + call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) + if (b2%val(i) > b%mini) exit + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + integer, external :: zmq_delete_task + if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then + stop 'Unable to delete task' + endif + end do + end do + + + call delete_selection_buffer(b2) + call sort_selection_buffer(b) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) +end subroutine + diff --git a/src/four_idx/NEED b/src/four_idx/NEED new file mode 100644 index 00000000..7ba1092b --- /dev/null +++ b/src/four_idx/NEED @@ -0,0 +1 @@ +integrals_bielec diff --git a/src/four_idx/README.rst b/src/four_idx/README.rst new file mode 100644 index 00000000..6ea432c6 --- /dev/null +++ b/src/four_idx/README.rst @@ -0,0 +1,6 @@ +======= +FourIdx +======= + +Four-index transformation. + diff --git a/src/four_idx/four_idx_transform.irp.f b/src/four_idx/four_idx_transform.irp.f new file mode 100644 index 00000000..a98ce2aa --- /dev/null +++ b/src/four_idx/four_idx_transform.irp.f @@ -0,0 +1,12 @@ +program four_idx + implicit none + BEGIN_DOC +! 4-index transformation from AO to MO integrals + END_DOC + + disk_access_mo_integrals = 'Write' + SOFT_TOUCH disk_access_mo_integrals + if (.true.) then + PROVIDE mo_bielec_integrals_in_map + endif +end diff --git a/src/hartree_fock/EZFIO.cfg b/src/hartree_fock/EZFIO.cfg new file mode 100644 index 00000000..22d2447e --- /dev/null +++ b/src/hartree_fock/EZFIO.cfg @@ -0,0 +1,53 @@ +[max_dim_diis] +type: integer +doc: Maximum size of the |DIIS| extrapolation procedure +interface: ezfio,provider,ocaml +default: 15 + +[threshold_diis] +type: Threshold +doc: Threshold on the convergence of the |DIIS| error vector during a Hartree-Fock calculation. If 0. is chosen, the square root of thresh_scf will be used. +interface: ezfio,provider,ocaml +default: 0. + +[thresh_scf] +type: Threshold +doc: Threshold on the convergence of the Hartree Fock energy. +interface: ezfio,provider,ocaml +default: 1.e-10 + +[n_it_scf_max] +type: Strictly_positive_int +doc: Maximum number of |SCF| iterations +interface: ezfio,provider,ocaml +default: 500 + +[level_shift] +type: Positive_float +doc: Initial value of the energy shift on the virtual |MOs| +interface: ezfio,provider,ocaml +default: 0.0 + +[scf_algorithm] +type: character*(32) +doc: Type of |SCF| algorithm used. Possible choices are [ Simple | DIIS] +interface: ezfio,provider,ocaml +default: DIIS + +[mo_guess_type] +type: MO_guess +doc: Initial MO guess. Can be [ Huckel | HCore ] +interface: ezfio,provider,ocaml +default: Huckel + +[energy] +type: double precision +doc: Calculated HF energy +interface: ezfio + +[no_oa_or_av_opt] +type: logical +doc: If |true|, skip the (inactive+core) --> (active) and the (active) --> (virtual) orbital rotations within the |SCF| procedure +interface: ezfio,provider,ocaml +default: False + diff --git a/src/hartree_fock/NEED b/src/hartree_fock/NEED new file mode 100644 index 00000000..24ae5fc0 --- /dev/null +++ b/src/hartree_fock/NEED @@ -0,0 +1,4 @@ +integrals_bielec +ao_one_e_integrals +mo_guess +bitmask diff --git a/src/hartree_fock/README.rst b/src/hartree_fock/README.rst new file mode 100644 index 00000000..373a611d --- /dev/null +++ b/src/hartree_fock/README.rst @@ -0,0 +1,33 @@ +============ +Hartree-Fock +============ + + +The Hartree-Fock module performs *Restricted* Hartree-Fock calculations (the +spatial part of the |MOs| is common for alpha and beta spinorbitals). + +The Hartree-Fock program does the following: + +#. Compute/Read all the one- and two-electron integrals, and store them in memory +#. Check in the |EZFIO| database if there is a set of |MOs|. If there is, it + will read them as initial guess. Otherwise, it will create a guess. +#. Perform the |SCF| iterations + +At each iteration, the |MOs| are saved in the |EZFIO| database. Hence, if the calculation +crashes for any unexpected reason, the calculation can be restarted by running again +the |SCF| with the same |EZFIO| database. + +The `DIIS`_ algorithm is implemented, as well as the `level-shifting`_ method. +If the |SCF| does not converge, try again with a higher value of :option:`level_shift`. + +To start a calculation from scratch, the simplest way is to remove the +``mo_basis`` directory from the |EZFIO| database, and run the |SCF| again. + + + + +.. _DIIS: https://en.wikipedia.org/w/index.php?title=DIIS +.. _level-shifting: https://doi.org/10.1002/qua.560070407 + + + diff --git a/src/hartree_fock/diagonalize_fock.irp.f b/src/hartree_fock/diagonalize_fock.irp.f new file mode 100644 index 00000000..2d198f62 --- /dev/null +++ b/src/hartree_fock/diagonalize_fock.irp.f @@ -0,0 +1,100 @@ +BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_tot_num) ] + implicit none + BEGIN_DOC + ! Eigenvector of the Fock matrix in the MO basis obtained with level shift. + END_DOC + + integer :: i,j + integer :: liwork, lwork, n, info + integer, allocatable :: iwork(:) + double precision, allocatable :: work(:), F(:,:), S(:,:) + double precision, allocatable :: diag(:) + + + allocate( F(mo_tot_num,mo_tot_num) ) + allocate (diag(mo_tot_num) ) + + do j=1,mo_tot_num + do i=1,mo_tot_num + F(i,j) = Fock_matrix_mo(i,j) + enddo + enddo + if(no_oa_or_av_opt)then + integer :: iorb,jorb + do i = 1, n_act_orb + iorb = list_act(i) + do j = 1, n_inact_orb + jorb = list_inact(j) + F(iorb,jorb) = 0.d0 + F(jorb,iorb) = 0.d0 + enddo + do j = 1, n_virt_orb + jorb = list_virt(j) + F(iorb,jorb) = 0.d0 + F(jorb,iorb) = 0.d0 + enddo + do j = 1, n_core_orb + jorb = list_core(j) + F(iorb,jorb) = 0.d0 + F(jorb,iorb) = 0.d0 + enddo + enddo + endif + + + ! Insert level shift here + do i = elec_beta_num+1, elec_alpha_num + F(i,i) += 0.5d0*level_shift + enddo + + do i = elec_alpha_num+1, mo_tot_num + F(i,i) += level_shift + enddo + + n = mo_tot_num + lwork = 1+6*n + 2*n*n + liwork = 3 + 5*n + + allocate(work(lwork)) + allocate(iwork(liwork) ) + + lwork = -1 + liwork = -1 + + call dsyevd( 'V', 'U', mo_tot_num, F, & + size(F,1), diag, work, lwork, iwork, liwork, info) + + if (info /= 0) then + print *, irp_here//' DSYEVD failed : ', info + stop 1 + endif + lwork = int(work(1)) + liwork = iwork(1) + deallocate(iwork) + deallocate(work) + + allocate(work(lwork)) + allocate(iwork(liwork) ) + call dsyevd( 'V', 'U', mo_tot_num, F, & + size(F,1), diag, work, lwork, iwork, liwork, info) + deallocate(iwork) + + + if (info /= 0) then + call dsyev( 'V', 'L', mo_tot_num, F, & + size(F,1), diag, work, lwork, info) + + if (info /= 0) then + print *, irp_here//' DSYEV failed : ', info + stop 1 + endif + endif + + call dgemm('N','N',ao_num,mo_tot_num,mo_tot_num, 1.d0, & + mo_coef, size(mo_coef,1), F, size(F,1), & + 0.d0, eigenvectors_Fock_matrix_mo, size(eigenvectors_Fock_matrix_mo,1)) + deallocate(work, F, diag) + + +END_PROVIDER + diff --git a/src/hartree_fock/diis.irp.f b/src/hartree_fock/diis.irp.f new file mode 100644 index 00000000..6ebb5879 --- /dev/null +++ b/src/hartree_fock/diis.irp.f @@ -0,0 +1,139 @@ +BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ] + implicit none + BEGIN_DOC + ! If threshold_DIIS is zero, choose sqrt(thresh_scf) + END_DOC + if (threshold_DIIS == 0.d0) then + threshold_DIIS_nonzero = dsqrt(thresh_scf) + else + threshold_DIIS_nonzero = threshold_DIIS + endif + ASSERT (threshold_DIIS_nonzero >= 0.d0) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)] + implicit none + BEGIN_DOC + ! Commutator FPS - SPF + END_DOC + double precision, allocatable :: scratch(:,:) + allocate( & + scratch(AO_num, AO_num) & + ) + + ! Compute FP + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + Fock_Matrix_AO,Size(Fock_Matrix_AO,1), & + HF_Density_Matrix_AO,Size(HF_Density_Matrix_AO,1), & + 0.d0, & + scratch,Size(scratch,1)) + + ! Compute FPS + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + scratch,Size(scratch,1), & + AO_Overlap,Size(AO_Overlap,1), & + 0.d0, & + FPS_SPF_Matrix_AO,Size(FPS_SPF_Matrix_AO,1)) + + ! Compute SP + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + AO_Overlap,Size(AO_Overlap,1), & + HF_Density_Matrix_AO,Size(HF_Density_Matrix_AO,1), & + 0.d0, & + scratch,Size(scratch,1)) + + ! Compute FPS - SPF + + call dgemm('N','N',AO_num,AO_num,AO_num, & + -1.d0, & + scratch,Size(scratch,1), & + Fock_Matrix_AO,Size(Fock_Matrix_AO,1), & + 1.d0, & + FPS_SPF_Matrix_AO,Size(FPS_SPF_Matrix_AO,1)) + +END_PROVIDER + +bEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_tot_num, mo_tot_num)] + implicit none + begin_doc +! Commutator FPS - SPF in MO basis + end_doc + call ao_to_mo(FPS_SPF_Matrix_AO, size(FPS_SPF_Matrix_AO,1), & + FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1)) +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ] +&BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ] + + BEGIN_DOC + ! Eigenvalues and eigenvectors of the Fock matrix over the AO basis + END_DOC + + implicit none + + double precision, allocatable :: scratch(:,:),work(:),Xt(:,:) + integer :: lwork,info + integer :: i,j + + lwork = 3*AO_num - 1 + allocate( & + scratch(AO_num,AO_num), & + work(lwork), & + Xt(AO_num,AO_num) & + ) + +! Calculate Xt + + do i=1,AO_num + do j=1,AO_num + Xt(i,j) = S_half_inv(j,i) + enddo + enddo + +! Calculate Fock matrix in orthogonal basis: F' = Xt.F.X + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + Fock_matrix_AO,size(Fock_matrix_AO,1), & + S_half_inv,size(S_half_inv,1), & + 0.d0, & + eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1)) + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + Xt,size(Xt,1), & + eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1), & + 0.d0, & + scratch,size(scratch,1)) + +! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues + + call dsyev('V','U',AO_num, & + scratch,size(scratch,1), & + eigenvalues_Fock_matrix_AO, & + work,lwork,info) + + if(info /= 0) then + print *, irp_here//' failed : ', info + stop 1 + endif + +! Back-transform eigenvectors: C =X.C' + + call dgemm('N','N',AO_num,AO_num,AO_num, & + 1.d0, & + S_half_inv,size(S_half_inv,1), & + scratch,size(scratch,1), & + 0.d0, & + eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1)) + +END_PROVIDER + diff --git a/src/hartree_fock/fock_matrix.irp.f b/src/hartree_fock/fock_matrix.irp.f new file mode 100644 index 00000000..7f473f7a --- /dev/null +++ b/src/hartree_fock/fock_matrix.irp.f @@ -0,0 +1,320 @@ + BEGIN_PROVIDER [ double precision, Fock_matrix_mo, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, Fock_matrix_diag_mo, (mo_tot_num)] + implicit none + BEGIN_DOC + ! Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is + ! + ! | F-K | F + K/2 | F | + ! |---------------------------------| + ! | F + K/2 | F | F - K/2 | + ! |---------------------------------| + ! | F | F - K/2 | F + K | + ! + ! F = 1/2 (Fa + Fb) + ! + ! K = Fb - Fa + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + Fock_matrix_mo = Fock_matrix_mo_alpha + else + + do j=1,elec_beta_num + ! F-K + do i=1,elec_beta_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + ! F+K/2 + do i=elec_beta_num+1,elec_alpha_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + ! F + do i=elec_alpha_num+1, mo_tot_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + enddo + enddo + + do j=elec_beta_num+1,elec_alpha_num + ! F+K/2 + do i=1,elec_beta_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + ! F + do i=elec_beta_num+1,elec_alpha_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + enddo + ! F-K/2 + do i=elec_alpha_num+1, mo_tot_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + enddo + + do j=elec_alpha_num+1, mo_tot_num + ! F + do i=1,elec_beta_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + enddo + ! F-K/2 + do i=elec_beta_num+1,elec_alpha_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + ! F+K + do i=elec_alpha_num+1,mo_tot_num + Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & + + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + enddo + enddo + + endif + + do i = 1, mo_tot_num + Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) + enddo +END_PROVIDER + + + + BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta, (ao_num, ao_num) ] + implicit none + BEGIN_DOC + ! Alpha Fock matrix in AO basis set + END_DOC + + integer :: i,j + do j=1,ao_num + do i=1,ao_num + Fock_matrix_ao_alpha(i,j) = ao_mono_elec_integral(i,j) + ao_bi_elec_integral_alpha(i,j) + Fock_matrix_ao_beta (i,j) = ao_mono_elec_integral(i,j) + ao_bi_elec_integral_beta (i,j) + enddo + enddo + +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, ao_bi_elec_integral_alpha, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_bi_elec_integral_beta , (ao_num, ao_num) ] + use map_module + implicit none + BEGIN_DOC + ! Alpha Fock matrix in AO basis set + END_DOC + + integer :: i,j,k,l,k1,r,s + integer :: i0,j0,k0,l0 + integer*8 :: p,q + double precision :: integral, c0, c1, c2 + double precision :: ao_bielec_integral, local_threshold + double precision, allocatable :: ao_bi_elec_integral_alpha_tmp(:,:) + double precision, allocatable :: ao_bi_elec_integral_beta_tmp(:,:) + + ao_bi_elec_integral_alpha = 0.d0 + ao_bi_elec_integral_beta = 0.d0 + if (do_direct_integrals) then + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,p,q,r,s,i0,j0,k0,l0, & + !$OMP ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp, c0, c1, c2, & + !$OMP local_threshold)& + !$OMP SHARED(ao_num,HF_density_matrix_ao_alpha,HF_density_matrix_ao_beta,& + !$OMP ao_integrals_map,ao_integrals_threshold, ao_bielec_integral_schwartz, & + !$OMP ao_overlap_abs, ao_bi_elec_integral_alpha, ao_bi_elec_integral_beta) + + allocate(keys(1), values(1)) + allocate(ao_bi_elec_integral_alpha_tmp(ao_num,ao_num), & + ao_bi_elec_integral_beta_tmp(ao_num,ao_num)) + ao_bi_elec_integral_alpha_tmp = 0.d0 + ao_bi_elec_integral_beta_tmp = 0.d0 + + q = ao_num*ao_num*ao_num*ao_num + !$OMP DO SCHEDULE(dynamic) + do p=1_8,q + call bielec_integrals_index_reverse(kk,ii,ll,jj,p) + if ( (kk(1)>ao_num).or. & + (ii(1)>ao_num).or. & + (jj(1)>ao_num).or. & + (ll(1)>ao_num) ) then + cycle + endif + k = kk(1) + i = ii(1) + l = ll(1) + j = jj(1) + + if (ao_overlap_abs(k,l)*ao_overlap_abs(i,j) & + < ao_integrals_threshold) then + cycle + endif + local_threshold = ao_bielec_integral_schwartz(k,l)*ao_bielec_integral_schwartz(i,j) + if (local_threshold < ao_integrals_threshold) then + cycle + endif + i0 = i + j0 = j + k0 = k + l0 = l + values(1) = 0.d0 + local_threshold = ao_integrals_threshold/local_threshold + do k2=1,8 + if (kk(k2)==0) then + cycle + endif + i = ii(k2) + j = jj(k2) + k = kk(k2) + l = ll(k2) + c0 = HF_density_matrix_ao_alpha(k,l)+HF_density_matrix_ao_beta(k,l) + c1 = HF_density_matrix_ao_alpha(k,i) + c2 = HF_density_matrix_ao_beta(k,i) + if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then + cycle + endif + if (values(1) == 0.d0) then + values(1) = ao_bielec_integral(k0,l0,i0,j0) + endif + integral = c0 * values(1) + ao_bi_elec_integral_alpha_tmp(i,j) += integral + ao_bi_elec_integral_beta_tmp (i,j) += integral + integral = values(1) + ao_bi_elec_integral_alpha_tmp(l,j) -= c1 * integral + ao_bi_elec_integral_beta_tmp (l,j) -= c2 * integral + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_bi_elec_integral_alpha += ao_bi_elec_integral_alpha_tmp + !$OMP END CRITICAL + !$OMP CRITICAL + ao_bi_elec_integral_beta += ao_bi_elec_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp) + !$OMP END PARALLEL + else + PROVIDE ao_bielec_integrals_in_map + + integer(omp_lock_kind) :: lck(ao_num) + integer*8 :: i8 + integer :: ii(8), jj(8), kk(8), ll(8), k2 + integer(cache_map_size_kind) :: n_elements_max, n_elements + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, & + !$OMP n_elements,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp)& + !$OMP SHARED(ao_num,HF_density_matrix_ao_alpha,HF_density_matrix_ao_beta,& + !$OMP ao_integrals_map, ao_bi_elec_integral_alpha, ao_bi_elec_integral_beta) + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + allocate(ao_bi_elec_integral_alpha_tmp(ao_num,ao_num), & + ao_bi_elec_integral_beta_tmp(ao_num,ao_num)) + ao_bi_elec_integral_alpha_tmp = 0.d0 + ao_bi_elec_integral_beta_tmp = 0.d0 + + !$OMP DO SCHEDULE(dynamic,64) + !DIR$ NOVECTOR + do i8=0_8,ao_integrals_map%map_size + n_elements = n_elements_max + call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) + do k1=1,n_elements + call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + + do k2=1,8 + if (kk(k2)==0) then + cycle + endif + i = ii(k2) + j = jj(k2) + k = kk(k2) + l = ll(k2) + integral = (HF_density_matrix_ao_alpha(k,l)+HF_density_matrix_ao_beta(k,l)) * values(k1) + ao_bi_elec_integral_alpha_tmp(i,j) += integral + ao_bi_elec_integral_beta_tmp (i,j) += integral + integral = values(k1) + ao_bi_elec_integral_alpha_tmp(l,j) -= HF_density_matrix_ao_alpha(k,i) * integral + ao_bi_elec_integral_beta_tmp (l,j) -= HF_density_matrix_ao_beta (k,i) * integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_bi_elec_integral_alpha += ao_bi_elec_integral_alpha_tmp + !$OMP END CRITICAL + !$OMP CRITICAL + ao_bi_elec_integral_beta += ao_bi_elec_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp) + !$OMP END PARALLEL + + endif + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_tot_num,mo_tot_num) ] + implicit none + BEGIN_DOC + ! Fock matrix on the MO basis + END_DOC + call ao_to_mo(Fock_matrix_ao_alpha,size(Fock_matrix_ao_alpha,1), & + Fock_matrix_mo_alpha,size(Fock_matrix_mo_alpha,1)) +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, Fock_matrix_mo_beta, (mo_tot_num,mo_tot_num) ] + implicit none + BEGIN_DOC + ! Fock matrix on the MO basis + END_DOC + call ao_to_mo(Fock_matrix_ao_beta,size(Fock_matrix_ao_beta,1), & + Fock_matrix_mo_beta,size(Fock_matrix_mo_beta,1)) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, HF_energy ] + implicit none + BEGIN_DOC + ! Hartree-Fock energy + END_DOC + HF_energy = nuclear_repulsion + + integer :: i,j + do j=1,ao_num + do i=1,ao_num + HF_energy += 0.5d0 * ( & + (ao_mono_elec_integral(i,j) + Fock_matrix_ao_alpha(i,j) ) * HF_density_matrix_ao_alpha(i,j) +& + (ao_mono_elec_integral(i,j) + Fock_matrix_ao_beta (i,j) ) * HF_density_matrix_ao_beta (i,j) ) + enddo + enddo + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, Fock_matrix_ao, (ao_num, ao_num) ] + implicit none + BEGIN_DOC + ! Fock matrix in AO basis set + END_DOC + + if ( (elec_alpha_num == elec_beta_num).and. & + (level_shift == 0.) ) & + then + integer :: i,j + do j=1,ao_num + do i=1,ao_num + Fock_matrix_ao(i,j) = Fock_matrix_ao_alpha(i,j) + enddo + enddo + else + call mo_to_ao(Fock_matrix_mo,size(Fock_matrix_mo,1), & + Fock_matrix_ao,size(Fock_matrix_ao,1)) + endif +END_PROVIDER + + diff --git a/src/hartree_fock/hf_density_matrix_ao.irp.f b/src/hartree_fock/hf_density_matrix_ao.irp.f new file mode 100644 index 00000000..736d1a97 --- /dev/null +++ b/src/hartree_fock/hf_density_matrix_ao.irp.f @@ -0,0 +1,41 @@ +BEGIN_PROVIDER [double precision, HF_density_matrix_ao_alpha, (ao_num,ao_num) ] + implicit none + BEGIN_DOC + ! S^{-1}.P_alpha.S^{-1} + END_DOC + + call dgemm('N','T',ao_num,ao_num,elec_alpha_num,1.d0, & + mo_coef, size(mo_coef,1), & + mo_coef, size(mo_coef,1), 0.d0, & + HF_density_matrix_ao_alpha, size(HF_density_matrix_ao_alpha,1)) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, HF_density_matrix_ao_beta, (ao_num,ao_num) ] + implicit none + BEGIN_DOC + ! S^{-1}.P_beta.S^{-1} + END_DOC + + call dgemm('N','T',ao_num,ao_num,elec_beta_num,1.d0, & + mo_coef, size(mo_coef,1), & + mo_coef, size(mo_coef,1), 0.d0, & + HF_density_matrix_ao_beta, size(HF_density_matrix_ao_beta,1)) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, HF_density_matrix_ao, (ao_num,ao_num) ] + implicit none + BEGIN_DOC + ! S^{-1}.P.S^{-1} where P = C.C^t + END_DOC + ASSERT (size(HF_density_matrix_ao,1) == size(HF_density_matrix_ao_alpha,1)) + if (elec_alpha_num== elec_beta_num) then + HF_density_matrix_ao = HF_density_matrix_ao_alpha + HF_density_matrix_ao_alpha + else + ASSERT (size(HF_density_matrix_ao,1) == size(HF_density_matrix_ao_beta ,1)) + HF_density_matrix_ao = HF_density_matrix_ao_alpha + HF_density_matrix_ao_beta + endif + +END_PROVIDER + diff --git a/src/hartree_fock/huckel.irp.f b/src/hartree_fock/huckel.irp.f new file mode 100644 index 00000000..c9e32ad5 --- /dev/null +++ b/src/hartree_fock/huckel.irp.f @@ -0,0 +1,34 @@ +subroutine huckel_guess + implicit none + BEGIN_DOC +! Build the MOs using the extended Huckel model + END_DOC + integer :: i,j + double precision :: accu + double precision :: c + character*(64) :: label + double precision, allocatable :: A(:,:) + label = "Guess" + c = 0.5d0 * 1.75d0 + + allocate (A(ao_num, ao_num)) + A = 0.d0 + do j=1,ao_num + do i=1,ao_num + A(i,j) = c * ao_overlap(i,j) * (ao_mono_elec_integral_diag(i) + ao_mono_elec_integral_diag(j)) + enddo + A(j,j) = ao_mono_elec_integral_diag(j) + ao_bi_elec_integral_alpha(j,j) + enddo + + Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num) + Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num) + +! TOUCH mo_coef + + TOUCH Fock_matrix_ao_alpha Fock_matrix_ao_beta + mo_coef = eigenvectors_fock_matrix_mo + SOFT_TOUCH mo_coef + call save_mos + deallocate(A) + +end diff --git a/src/hartree_fock/roothaan_hall_scf.irp.f b/src/hartree_fock/roothaan_hall_scf.irp.f new file mode 100644 index 00000000..e860496f --- /dev/null +++ b/src/hartree_fock/roothaan_hall_scf.irp.f @@ -0,0 +1,289 @@ +subroutine Roothaan_Hall_SCF + +BEGIN_DOC +! Roothaan-Hall algorithm for SCF Hartree-Fock calculation +END_DOC + + implicit none + + double precision :: energy_SCF,energy_SCF_previous,Delta_energy_SCF + double precision :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta + double precision, allocatable :: Fock_matrix_DIIS(:,:,:),error_matrix_DIIS(:,:,:) + + integer :: iteration_SCF,dim_DIIS,index_dim_DIIS + + integer :: i,j + double precision, allocatable :: mo_coef_save(:,:) + + PROVIDE ao_md5 mo_occ level_shift + + allocate(mo_coef_save(ao_num,mo_tot_num), & + Fock_matrix_DIIS (ao_num,ao_num,max_dim_DIIS), & + error_matrix_DIIS(ao_num,ao_num,max_dim_DIIS) & + ) + + call write_time(6) + + write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)') & + '====','================','================','================' + write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)') & + ' N ', 'Energy ', 'Energy diff ', 'DIIS error ' + write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)') & + '====','================','================','================' + +! Initialize energies and density matrices + + energy_SCF_previous = HF_energy + Delta_energy_SCF = 1.d0 + iteration_SCF = 0 + dim_DIIS = 0 + max_error_DIIS = 1.d0 + + +! +! Start of main SCF loop +! + do while(( (max_error_DIIS > threshold_DIIS_nonzero).or.(dabs(Delta_energy_SCF) > thresh_SCF) ) .and. (iteration_SCF < n_it_SCF_max)) + +! Increment cycle number + + iteration_SCF += 1 + +! Current size of the DIIS space + + dim_DIIS = min(dim_DIIS+1,max_dim_DIIS) + + if (scf_algorithm == 'DIIS') then + + ! Store Fock and error matrices at each iteration + do j=1,ao_num + do i=1,ao_num + index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1 + Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j) + error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j) + enddo + enddo + + ! Compute the extrapolated Fock matrix + + call extrapolate_Fock_matrix( & + error_matrix_DIIS,Fock_matrix_DIIS, & + Fock_matrix_AO,size(Fock_matrix_AO,1), & + iteration_SCF,dim_DIIS & + ) + + Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0 + Fock_matrix_AO_beta = Fock_matrix_AO*0.5d0 + TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta + + endif + + MO_coef = eigenvectors_Fock_matrix_MO + + TOUCH MO_coef + +! Calculate error vectors + + max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO)) + +! SCF energy + + energy_SCF = HF_energy + Delta_Energy_SCF = energy_SCF - energy_SCF_previous + if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then + Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS) + Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0 + Fock_matrix_AO_beta = Fock_matrix_AO*0.5d0 + TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta + endif + + double precision :: level_shift_save + level_shift_save = level_shift + mo_coef_save(1:ao_num,1:mo_tot_num) = mo_coef(1:ao_num,1:mo_tot_num) + do while (Delta_Energy_SCF > 0.d0) + mo_coef(1:ao_num,1:mo_tot_num) = mo_coef_save + TOUCH mo_coef + level_shift = level_shift + 1.0d0 + mo_coef(1:ao_num,1:mo_tot_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_tot_num) + TOUCH mo_coef level_shift + Delta_Energy_SCF = HF_energy - energy_SCF_previous + energy_SCF = HF_energy + if (level_shift-level_shift_save > 50.d0) then + level_shift = level_shift_save + SOFT_TOUCH level_shift + exit + endif + dim_DIIS=0 + enddo + level_shift = level_shift * 0.75d0 + SOFT_TOUCH level_shift + energy_SCF_previous = energy_SCF + +! Print results at the end of each iteration + + write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)') & + iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, dim_DIIS + + if (Delta_energy_SCF < 0.d0) then + call save_mos + endif + + enddo + +! +! End of Main SCF loop +! + + write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)') & + '====','================','================','================' + write(6,*) + + if(.not.no_oa_or_av_opt)then + call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1),size(Fock_matrix_mo,2),mo_label,1,.true.) + endif + + call write_double(6, Energy_SCF, 'Hartree-Fock energy') + call ezfio_set_hartree_fock_energy(Energy_SCF) + + call write_time(6) + +end + +subroutine extrapolate_Fock_matrix( & + error_matrix_DIIS,Fock_matrix_DIIS, & + Fock_matrix_AO_,size_Fock_matrix_AO, & + iteration_SCF,dim_DIIS & +) + +BEGIN_DOC +! Compute the extrapolated Fock matrix using the DIIS procedure +END_DOC + + implicit none + + double precision,intent(in) :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*) + integer,intent(in) :: iteration_SCF, size_Fock_matrix_AO + double precision,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num) + integer,intent(inout) :: dim_DIIS + + double precision,allocatable :: B_matrix_DIIS(:,:),X_vector_DIIS(:) + double precision,allocatable :: C_vector_DIIS(:) + + double precision,allocatable :: scratch(:,:) + integer :: i,j,k,i_DIIS,j_DIIS + + allocate( & + B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), & + X_vector_DIIS(dim_DIIS+1), & + C_vector_DIIS(dim_DIIS+1), & + scratch(ao_num,ao_num) & + ) + +! Compute the matrices B and X + do j=1,dim_DIIS + do i=1,dim_DIIS + + j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1 + i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1 + +! Compute product of two errors vectors + + call dgemm('N','N',ao_num,ao_num,ao_num, & + 1.d0, & + error_matrix_DIIS(1,1,i_DIIS),size(error_matrix_DIIS,1), & + error_matrix_DIIS(1,1,j_DIIS),size(error_matrix_DIIS,1), & + 0.d0, & + scratch,size(scratch,1)) + +! Compute Trace + + B_matrix_DIIS(i,j) = 0.d0 + do k=1,ao_num + B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k) + enddo + enddo + enddo + +! Pad B matrix and build the X matrix + + do i=1,dim_DIIS + B_matrix_DIIS(i,dim_DIIS+1) = -1.d0 + B_matrix_DIIS(dim_DIIS+1,i) = -1.d0 + C_vector_DIIS(i) = 0.d0 + enddo + B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0 + C_vector_DIIS(dim_DIIS+1) = -1.d0 + +! Solve the linear system C = B.X + + integer :: info + integer,allocatable :: ipiv(:) + + allocate( & + ipiv(dim_DIIS+1) & + ) + + double precision, allocatable :: AF(:,:) + allocate (AF(dim_DIIS+1,dim_DIIS+1)) + double precision :: rcond, ferr, berr + integer :: iwork(dim_DIIS+1), lwork + + call dsysvx('N','U',dim_DIIS+1,1, & + B_matrix_DIIS,size(B_matrix_DIIS,1), & + AF, size(AF,1), & + ipiv, & + C_vector_DIIS,size(C_vector_DIIS,1), & + X_vector_DIIS,size(X_vector_DIIS,1), & + rcond, & + ferr, & + berr, & + scratch,-1, & + iwork, & + info & + ) + lwork = int(scratch(1,1)) + deallocate(scratch) + allocate(scratch(lwork,1)) + + call dsysvx('N','U',dim_DIIS+1,1, & + B_matrix_DIIS,size(B_matrix_DIIS,1), & + AF, size(AF,1), & + ipiv, & + C_vector_DIIS,size(C_vector_DIIS,1), & + X_vector_DIIS,size(X_vector_DIIS,1), & + rcond, & + ferr, & + berr, & + scratch,size(scratch), & + iwork, & + info & + ) + + if(info < 0) then + stop 'bug in DIIS' + endif + + if (rcond > 1.d-12) then + + ! Compute extrapolated Fock matrix + + + !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) + do j=1,ao_num + do i=1,ao_num + Fock_matrix_AO_(i,j) = 0.d0 + enddo + do k=1,dim_DIIS + do i=1,ao_num + Fock_matrix_AO_(i,j) = Fock_matrix_AO_(i,j) + & + X_vector_DIIS(k)*Fock_matrix_DIIS(i,j,dim_DIIS-k+1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + dim_DIIS = 0 + endif + +end diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f new file mode 100644 index 00000000..a04fcc31 --- /dev/null +++ b/src/hartree_fock/scf.irp.f @@ -0,0 +1,60 @@ +program scf + BEGIN_DOC +! Produce `Hartree_Fock` MO orbital +! output: mo_basis.mo_tot_num mo_basis.mo_label mo_basis.ao_md5 mo_basis.mo_coef mo_basis.mo_occ +! output: hartree_fock.energy +! optional: mo_basis.mo_coef + END_DOC + call create_guess + call orthonormalize_mos + call run +end + +subroutine create_guess + implicit none + BEGIN_DOC +! Create a MO guess if no MOs are present in the EZFIO directory + END_DOC + logical :: exists + PROVIDE ezfio_filename + call ezfio_has_mo_basis_mo_coef(exists) + if (.not.exists) then + if (mo_guess_type == "HCore") then + mo_coef = ao_ortho_lowdin_coef + TOUCH mo_coef + mo_label = 'Guess' + call mo_as_eigvectors_of_mo_matrix(mo_mono_elec_integral,size(mo_mono_elec_integral,1),size(mo_mono_elec_integral,2),mo_label,1,.false.) + SOFT_TOUCH mo_coef mo_label + else if (mo_guess_type == "Huckel") then + call huckel_guess + else + print *, 'Unrecognized MO guess type : '//mo_guess_type + stop 1 + endif + endif +end + +subroutine run + + BEGIN_DOC +! Run SCF calculation + END_DOC + + use bitmasks + implicit none + + double precision :: SCF_energy_before,SCF_energy_after,diag_H_mat_elem + double precision :: EHF + integer :: i_it, i, j, k + + EHF = HF_energy + + mo_label = "Canonical" + +! Choose SCF algorithm + + call Roothaan_Hall_SCF + +end + + diff --git a/src/mrpt_utils/H_apply.irp.f b/src/mrpt_utils/h_apply.irp.f similarity index 100% rename from src/mrpt_utils/H_apply.irp.f rename to src/mrpt_utils/h_apply.irp.f diff --git a/src/perturbation/perturbation.irp.f b/src/perturbation/perturbation.irp.f index f7d1f478..040f3026 100644 --- a/src/perturbation/perturbation.irp.f +++ b/src/perturbation/perturbation.irp.f @@ -2,7 +2,7 @@ BEGIN_SHELL [ /usr/bin/env python2 ] from perturbation import perturbations import os -filename = os.environ["QP_ROOT"]+"/src/Perturbation/perturbation.template.f" +filename = os.environ["QP_ROOT"]+"/src/perturbation/perturbation.template.f" file = open(filename,'r') template = file.read() file.close() diff --git a/src/utils/LinearAlgebra.irp.f b/src/utils/linear_algebra.irp.f similarity index 100% rename from src/utils/LinearAlgebra.irp.f rename to src/utils/linear_algebra.irp.f