diff --git a/.travis.yml b/.travis.yml index 57991ba3..22cd358e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,12 +9,12 @@ sudo: false addons: apt: packages: - - zlib1g-dev - - libgmp3-dev - gfortran - gcc - liblapack-dev - graphviz +# - zlib1g-dev +# - libgmp3-dev cache: directories: @@ -29,4 +29,4 @@ script: - source ./quantum_package.rc ; qp_module.py install Full_CI Full_CI_ZMQ Hartree_Fock CAS_SD_ZMQ mrcepa0 All_singles - source ./quantum_package.rc ; ninja - source ./quantum_package.rc ; cd ocaml ; make ; cd - - - source ./quantum_package.rc ; cd tests ; ./run_tests.sh #-v + - source ./quantum_package.rc ; cd tests ; ./run_tests.sh -v diff --git a/configure b/configure index 8cb02608..128d7126 100755 --- a/configure +++ b/configure @@ -71,8 +71,8 @@ d_dependency = { "emsl": ["python"], "gcc": [], "g++": [], - "zeromq" : [ "g++" ], - "f77zmq" : [ "zeromq", "python" ], + "zeromq" : [ "g++", "make" ], + "f77zmq" : [ "zeromq", "python", "make" ], "python": [], "ninja": ["g++", "python"], "make": [], @@ -150,7 +150,6 @@ f77zmq = Info( url='{head}/zeromq/f77_zmq/{tail}'.format(**path_github), description=' F77-ZeroMQ', default_path=join(QP_ROOT_LIB, "libf77zmq.a") ) -# join(QP_ROOT, "src", "ZMQ", "f77zmq.h") ) p_graphviz = Info( url='https://github.com/xflr6/graphviz/archive/master.tar.gz', @@ -166,7 +165,7 @@ d_info = dict() for m in ["ocaml", "m4", "curl", "zlib", "patch", "irpf90", "docopt", "resultsFile", "ninja", "emsl", "ezfio", "p_graphviz", - "zeromq", "f77zmq","bats" ]: + "zeromq", "f77zmq","bats"]: exec ("d_info['{0}']={0}".format(m)) @@ -494,7 +493,9 @@ def create_ninja_and_rc(l_installed): 'export PYTHONPATH="${QP_EZFIO}/Python":"${QP_PYTHON}":"${PYTHONPATH}"', 'export PATH="${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml:"${PATH}"', 'export LD_LIBRARY_PATH="${QP_ROOT}"/lib:"${LD_LIBRARY_PATH}"', - 'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"', "", + 'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"', + 'export C_INCLUDE_PATH="${C_INCLUDE_PATH}":"${QP_ROOT}"/include', + '', 'source ${QP_ROOT}/install/EZFIO/Bash/ezfio.sh', "", 'source ${HOME}/.opam/opam-init/init.sh > /dev/null 2> /dev/null || true', '', diff --git a/include/.empty b/include/.empty new file mode 100644 index 00000000..e69de29b diff --git a/install/scripts/build.sh b/install/scripts/build.sh index 79a71065..5071b5aa 100755 --- a/install/scripts/build.sh +++ b/install/scripts/build.sh @@ -4,7 +4,11 @@ BUILD=_build/${TARGET} rm -rf -- ${BUILD} mkdir ${BUILD} || exit 1 -tar -zxf Downloads/${TARGET}.tar.gz --strip-components=1 --directory=${BUILD} || exit 1 +if [[ -f Downloads/${TARGET}.tar.gz ]] ; then + tar -zxf Downloads/${TARGET}.tar.gz --strip-components=1 --directory=${BUILD} || exit 1 +elif [[ -f Downloads/${TARGET}.tar.bz2 ]] ; then + tar -jxf Downloads/${TARGET}.tar.bz2 --strip-components=1 --directory=${BUILD} || exit 1 +fi _install || exit 1 rm -rf -- ${BUILD} _build/${TARGET}.log exit 0 diff --git a/install/scripts/install_curl.sh b/install/scripts/install_curl.sh index c3a48024..6194a0e0 100755 --- a/install/scripts/install_curl.sh +++ b/install/scripts/install_curl.sh @@ -10,10 +10,4 @@ function _install() mv curl.ermine ${QP_ROOT}/bin/curl || return 1 } -BUILD=_build/${TARGET} -rm -rf -- ${BUILD} -mkdir ${BUILD} || exit 1 -tar -xvjf Downloads/${TARGET}.tar.bz2 --strip-components=1 --directory=${BUILD} || exit 1 -_install || exit 1 -rm -rf -- ${BUILD} _build/${TARGET}.log -exit 0 \ No newline at end of file +source scripts/build.sh diff --git a/install/scripts/install_f77zmq.sh b/install/scripts/install_f77zmq.sh index 8357857c..92388337 100755 --- a/install/scripts/install_f77zmq.sh +++ b/install/scripts/install_f77zmq.sh @@ -7,10 +7,9 @@ function _install() cd .. QP_ROOT=$PWD cd - - export C_INCLUDE_PATH="${C_INCLUDE_PATH}":"${QP_ROOT}"/lib set -e set -u - export ZMQ_H="${QP_ROOT}"/lib/zmq.h + export ZMQ_H="${QP_ROOT}"/include/zmq.h cd "${BUILD}" make -j 8 || exit 1 mv libf77zmq.a "${QP_ROOT}"/lib || exit 1 diff --git a/install/scripts/install_gmp.sh b/install/scripts/install_gmp.sh new file mode 100755 index 00000000..9aea2973 --- /dev/null +++ b/install/scripts/install_gmp.sh @@ -0,0 +1,17 @@ +#!/bin/bash -x + +TARGET=gmp + +function _install() +{ + rm -rf -- ${TARGET} + mkdir ${TARGET} || exit 1 + cd .. + QP_ROOT=$PWD + cd - + cd ${BUILD} + ./configure --prefix=$QP_ROOT && make -j 8 || exit 1 + make install || exit 1 +} + +source scripts/build.sh diff --git a/install/scripts/install_m4.sh b/install/scripts/install_m4.sh index ca62a025..5a52d757 100755 --- a/install/scripts/install_m4.sh +++ b/install/scripts/install_m4.sh @@ -8,8 +8,7 @@ function _install() QP_ROOT=$PWD cd - cd ${BUILD} - ./configure && make || exit 1 - ln -sf ${PWD}/src/m4 ${QP_ROOT}/bin || exit 1 + ./configure --prefix=$QP_ROOT && make || exit 1 } source scripts/build.sh diff --git a/install/scripts/install_ocaml.sh b/install/scripts/install_ocaml.sh index 913ae75d..b82216d3 100755 --- a/install/scripts/install_ocaml.sh +++ b/install/scripts/install_ocaml.sh @@ -5,11 +5,11 @@ QP_ROOT=$PWD cd - # Normal installation -PACKAGES="core cryptokit zarith ocamlfind sexplib ZMQ" +PACKAGES="core cryptokit.1.10 ocamlfind sexplib ZMQ" #ppx_sexp_conv # Needed for ZeroMQ -export C_INCLUDE_PATH="${QP_ROOT}"/lib:"${C_INCLUDE_PATH}" +export C_INCLUDE_PATH="${QP_ROOT}"/include:"${C_INCLUDE_PATH}" export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}" export LD_LIBRARY_PATH="${QP_ROOT}"/lib:"${LD_LIBRARY_PATH}" diff --git a/install/scripts/install_patch.sh b/install/scripts/install_patch.sh index 10522401..224ac8f8 100755 --- a/install/scripts/install_patch.sh +++ b/install/scripts/install_patch.sh @@ -9,11 +9,11 @@ function _install() QP_ROOT=$PWD cd - cd ${BUILD} - ./configure --prefix=${QP_ROOT}/install/${TARGET} && make || exit 1 + ./configure --prefix=${QP_ROOT} && make || exit 1 make install || exit 1 cd - cp ${TARGET}/bin/${TARGET} ${QP_ROOT}/bin || exit 1 rm -R -- ${TARGET} || exit 1 } -source scripts/build.sh \ No newline at end of file +source scripts/build.sh diff --git a/install/scripts/install_zeromq.sh b/install/scripts/install_zeromq.sh index 3bf2a715..f6596f9c 100755 --- a/install/scripts/install_zeromq.sh +++ b/install/scripts/install_zeromq.sh @@ -7,22 +7,13 @@ function _install() cd .. QP_ROOT=$PWD cd - - export C_INCLUDE_PATH="${C_INCLUDE_PATH}":./ set -e set -u ORIG=$(pwd) cd "${BUILD}" - ./configure --without-libsodium || exit 1 + ./configure --prefix=$QP_ROOT --without-libsodium || exit 1 make -j 8 || exit 1 - rm -f -- "${QP_ROOT}"/lib/libzmq.a "${QP_ROOT}"/lib/libzmq.so "${QP_ROOT}"/lib/libzmq.so.? - cp .libs/libzmq.a "${QP_ROOT}"/lib - cp .libs/libzmq.so "${QP_ROOT}"/lib/libzmq.so.5 -# cp src/.libs/libzmq.a "${QP_ROOT}"/lib -# cp src/.libs/libzmq.so "${QP_ROOT}"/lib/libzmq.so.4 - cp include/{zmq.h,zmq_utils.h} "${QP_ROOT}"/lib - cd "${QP_ROOT}"/lib - ln -s libzmq.so.5 libzmq.so -# ln -s libzmq.so.4 libzmq.so + make install || exit 1 cd ${ORIG} return 0 } diff --git a/install/scripts/install_zlib.sh b/install/scripts/install_zlib.sh index 06ce67f3..ea268f2e 100755 --- a/install/scripts/install_zlib.sh +++ b/install/scripts/install_zlib.sh @@ -11,11 +11,8 @@ function _install() cd - cd ${BUILD} ./configure && make || exit 1 - make install prefix=$QP_ROOT/install/${TARGET} || exit 1 - ln -s -f $QP_ROOT/install/${TARGET}/lib/libz.so $QP_ROOT/lib || exit 1 - ln -s -f $QP_ROOT/install/${TARGET}/lib/libz.a $QP_ROOT/lib || exit 1 - ln -s -f $QP_ROOT/install/${TARGET}/include/zlib.h $QP_ROOT/lib || exit 1 - ln -s -f $QP_ROOT/install/${TARGET}/include/zconf.h $QP_ROOT/lib || exit 1 + ./configure --prefix=$QP_ROOT && make || exit 1 + make install || exit 1 } source scripts/build.sh diff --git a/ocaml/Pseudo.ml b/ocaml/Pseudo.ml index 3fb4736e..7f813937 100644 --- a/ocaml/Pseudo.ml +++ b/ocaml/Pseudo.ml @@ -124,23 +124,27 @@ let to_string t = let find in_channel element = In_channel.seek in_channel 0L; - let element_read, old_pos = - ref Element.X, + let loop, element_read, old_pos = + ref true, + ref None, ref (In_channel.pos in_channel) in - while !element_read <> element + + while !loop do - let buffer = - old_pos := In_channel.pos in_channel; - match In_channel.input_line in_channel with - | Some line -> String.split ~on:' ' line - |> List.hd_exn - | None -> "" - in try - element_read := Element.of_string buffer + let buffer = + old_pos := In_channel.pos in_channel; + match In_channel.input_line in_channel with + | Some line -> String.split ~on:' ' line + |> List.hd_exn + | None -> raise End_of_file + in + element_read := Some (Element.of_string buffer); + loop := !element_read <> (Some element) with | Element.ElementError _ -> () + | End_of_file -> loop := false done ; In_channel.seek in_channel !old_pos; !element_read @@ -148,124 +152,126 @@ let find in_channel element = (** Read the Pseudopotential in GAMESS format *) let read_element in_channel element = - ignore (find in_channel element); - - let rec read result = - match In_channel.input_line in_channel with - | None -> result - | Some line -> - if (String.strip line = "") then - result - else - read (line::result) - in - - let data = - read [] - |> List.rev - in - - let debug_data = - String.concat ~sep:"\n" data - in - - let decode_first_line = function - | first_line :: rest -> + match find in_channel element with + | Some e when e = element -> begin - let first_line_split = - String.split first_line ~on:' ' - |> List.filter ~f:(fun x -> (String.strip x) <> "") + let rec read result = + match In_channel.input_line in_channel with + | None -> result + | Some line -> + if (String.strip line = "") then + result + else + read (line::result) in - match first_line_split with - | e :: "GEN" :: n :: p -> - { element = Element.of_string e ; - n_elec = Int.of_string n |> Positive_int.of_int ; - local = [] ; - non_local = [] - }, rest - | _ -> failwith ( - Printf.sprintf "Unable to read Pseudopotential : \n%s\n" - debug_data ) - end - | _ -> failwith ("Error reading pseudopotential\n"^debug_data) - in - let rec loop create_primitive accu = function - | (0,rest) -> List.rev accu, rest - | (n,line::rest) -> - begin - match - String.split line ~on:' ' - |> List.filter ~f:(fun x -> String.strip x <> "") - with - | c :: i :: e :: [] -> - let i = - Int.of_string i - in - let elem = - ( create_primitive - (Float.of_string e |> AO_expo.of_float) - (i-2 |> R_power.of_int), - Float.of_string c |> AO_coef.of_float - ) - in - loop create_primitive (elem::accu) (n-1, rest) + let data = + read [] + |> List.rev + in + + let debug_data = + String.concat ~sep:"\n" data + in + + let decode_first_line = function + | first_line :: rest -> + begin + let first_line_split = + String.split first_line ~on:' ' + |> List.filter ~f:(fun x -> (String.strip x) <> "") + in + match first_line_split with + | e :: "GEN" :: n :: p -> + { element = Element.of_string e ; + n_elec = Int.of_string n |> Positive_int.of_int ; + local = [] ; + non_local = [] + }, rest + | _ -> failwith ( + Printf.sprintf "Unable to read Pseudopotential : \n%s\n" + debug_data ) + end | _ -> failwith ("Error reading pseudopotential\n"^debug_data) - end - | _ -> failwith ("Error reading pseudopotential\n"^debug_data) - in + in - let decode_local (pseudo,data) = - let decode_local_n n rest = - let result, rest = - loop Primitive_local.of_expo_r_power [] (Positive_int.to_int n,rest) + let rec loop create_primitive accu = function + | (0,rest) -> List.rev accu, rest + | (n,line::rest) -> + begin + match + String.split line ~on:' ' + |> List.filter ~f:(fun x -> String.strip x <> "") + with + | c :: i :: e :: [] -> + let i = + Int.of_string i + in + let elem = + ( create_primitive + (Float.of_string e |> AO_expo.of_float) + (i-2 |> R_power.of_int), + Float.of_string c |> AO_coef.of_float + ) + in + loop create_primitive (elem::accu) (n-1, rest) + | _ -> failwith ("Error reading pseudopotential\n"^debug_data) + end + | _ -> failwith ("Error reading pseudopotential\n"^debug_data) in - { pseudo with local = result }, rest - in - match data with - | n :: rest -> - let n = - String.strip n - |> Int.of_string - |> Positive_int.of_int + + let decode_local (pseudo,data) = + let decode_local_n n rest = + let result, rest = + loop Primitive_local.of_expo_r_power [] (Positive_int.to_int n,rest) + in + { pseudo with local = result }, rest in - decode_local_n n rest - | _ -> failwith ("Unable to read (non-)local pseudopotential\n"^debug_data) - in - - let decode_non_local (pseudo,data) = - let decode_non_local_n proj n (pseudo,data) = - let result, rest = - loop (Primitive_non_local.of_proj_expo_r_power proj) - [] (Positive_int.to_int n, data) + match data with + | n :: rest -> + let n = + String.strip n + |> Int.of_string + |> Positive_int.of_int + in + decode_local_n n rest + | _ -> failwith ("Unable to read (non-)local pseudopotential\n"^debug_data) in - { pseudo with non_local = pseudo.non_local @ result }, rest - in - let rec new_proj (pseudo,data) proj = - match data with - | n :: rest -> - let n = - String.strip n - |> Int.of_string - |> Positive_int.of_int - in - let result = - decode_non_local_n proj n (pseudo,rest) - and proj_next = - (Positive_int.to_int proj)+1 - |> Positive_int.of_int - in - new_proj result proj_next - | _ -> pseudo - in - new_proj (pseudo,data) (Positive_int.of_int 0) - in - decode_first_line data - |> decode_local - |> decode_non_local + let decode_non_local (pseudo,data) = + let decode_non_local_n proj n (pseudo,data) = + let result, rest = + loop (Primitive_non_local.of_proj_expo_r_power proj) + [] (Positive_int.to_int n, data) + in + { pseudo with non_local = pseudo.non_local @ result }, rest + in + let rec new_proj (pseudo,data) proj = + match data with + | n :: rest -> + let n = + String.strip n + |> Int.of_string + |> Positive_int.of_int + in + let result = + decode_non_local_n proj n (pseudo,rest) + and proj_next = + (Positive_int.to_int proj)+1 + |> Positive_int.of_int + in + new_proj result proj_next + | _ -> pseudo + in + new_proj (pseudo,data) (Positive_int.of_int 0) + in + + decode_first_line data + |> decode_local + |> decode_non_local + end + | _ -> empty element - include To_md5 diff --git a/ocaml/qp_create_guess.ml b/ocaml/qp_create_guess.ml index 62af57de..bebfdad3 100644 --- a/ocaml/qp_create_guess.ml +++ b/ocaml/qp_create_guess.ml @@ -88,8 +88,9 @@ let run ~multiplicity ezfio_file = ~alpha:(Elec_alpha_number.of_int alpha_new) ~beta:(Elec_beta_number.of_int beta_new) pair ) in + let c = - Array.create ~len:(List.length determinants) (Det_coef.of_float 1.) + Array.init (List.length determinants) (fun _ -> Det_coef.of_float ((Random.float 2.)-.1.)) in determinants diff --git a/plugins/All_singles/.gitignore b/plugins/All_singles/.gitignore deleted file mode 100644 index cae0c971..00000000 --- a/plugins/All_singles/.gitignore +++ /dev/null @@ -1,32 +0,0 @@ -# Automatically created by $QP_ROOT/scripts/module/module_handler.py -.ninja_deps -.ninja_log -AO_Basis -Bitmask -Davidson -Determinants -Electrons -Ezfio_files -Generators_restart -Hartree_Fock -IRPF90_man -IRPF90_temp -Integrals_Bielec -Integrals_Monoelec -MOGuess -MO_Basis -Makefile -Makefile.depend -Nuclei -Perturbation -Properties -Pseudo -Selectors_no_sorted -Utils -ZMQ -all_1h_1p -all_singles -ezfio_interface.irp.f -irpf90.make -irpf90_entities -tags \ No newline at end of file diff --git a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f index 6844ed90..881f74c3 100644 --- a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f +++ b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f @@ -5,11 +5,15 @@ program fci_zmq double precision, allocatable :: pt2(:) integer :: degree + double precision :: threshold_davidson_in allocate (pt2(N_states)) pt2 = 1.d0 - diag_algorithm = "Lapack" + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + if (N_det > N_det_max) then call diagonalize_CI @@ -33,20 +37,11 @@ program fci_zmq double precision :: E_CI_before(N_states) - integer :: n_det_before + integer :: n_det_before, to_select print*,'Beginning the selection ...' E_CI_before(1:N_states) = CI_energy(1:N_states) do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) ) - n_det_before = N_det - call ZMQ_selection(max(256-N_det, N_det), pt2) - - PROVIDE psi_coef - PROVIDE psi_det - PROVIDE psi_det_sorted - - call diagonalize_CI - call save_wavefunction print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -71,12 +66,38 @@ program fci_zmq endif E_CI_before(1:N_states) = CI_energy(1:N_states) call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) + + n_det_before = N_det + to_select = 2*N_det + to_select = max(64-to_select, to_select) + to_select = min(to_select,N_det_max-n_det_before) + call ZMQ_selection(to_select, pt2) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + if (N_det == N_det_max) then + threshold_davidson = threshold_davidson_in + SOFT_TOUCH threshold_davidson + endif + call diagonalize_CI + call save_wavefunction + call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) enddo + + if (N_det < N_det_max) then + threshold_davidson = threshold_davidson_in + SOFT_TOUCH threshold_davidson + call diagonalize_CI + call save_wavefunction + call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) + endif integer :: exc_max, degree_min exc_max = 0 print *, 'CAS determinants : ', N_det_cas - do i=1,min(N_det_cas,10) + do i=1,min(N_det_cas,20) do k=i,N_det_cas call get_excitation_degree(psi_cas(1,1,k),psi_cas(1,1,i),degree,N_int) exc_max = max(exc_max,degree) @@ -108,7 +129,7 @@ program fci_zmq endif call save_wavefunction call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) - call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before+pt2) + call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before(1)+pt2(1)) end diff --git a/plugins/Full_CI_ZMQ/.gitignore b/plugins/Full_CI_ZMQ/.gitignore deleted file mode 100644 index a996a508..00000000 --- a/plugins/Full_CI_ZMQ/.gitignore +++ /dev/null @@ -1,34 +0,0 @@ -# Automatically created by $QP_ROOT/scripts/module/module_handler.py -.ninja_deps -.ninja_log -AO_Basis -Bitmask -Davidson -Determinants -Electrons -Ezfio_files -Full_CI -Generators_full -Hartree_Fock -IRPF90_man -IRPF90_temp -Integrals_Bielec -Integrals_Monoelec -MOGuess -MO_Basis -Makefile -Makefile.depend -Nuclei -Perturbation -Properties -Pseudo -Selectors_full -Utils -ZMQ -ezfio_interface.irp.f -fci_zmq -irpf90.make -irpf90_entities -selection_davidson_slave -selection_slave -tags \ No newline at end of file diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index c80b7410..ae0d7989 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -5,11 +5,15 @@ program fci_zmq double precision, allocatable :: pt2(:) integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in allocate (pt2(N_states)) pt2 = 1.d0 - diag_algorithm = "Lapack" + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson if (N_det > N_det_max) then call diagonalize_CI @@ -33,29 +37,11 @@ program fci_zmq double precision :: E_CI_before(N_states) - integer :: n_det_before print*,'Beginning the selection ...' E_CI_before(1:N_states) = CI_energy(1:N_states) + n_det_before = 0 do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) ) - n_det_before = N_det - call ZMQ_selection(max(1024-N_det, N_det), pt2) - - PROVIDE psi_coef - PROVIDE psi_det - PROVIDE psi_det_sorted - - call diagonalize_CI - call save_wavefunction - -! if (N_det > N_det_max) then -! psi_det = psi_det_sorted -! psi_coef = psi_coef_sorted -! N_det = N_det_max -! soft_touch N_det psi_det psi_coef -! call diagonalize_CI -! call save_wavefunction -! endif print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -79,9 +65,35 @@ program fci_zmq enddo endif E_CI_before(1:N_states) = CI_energy(1:N_states) - call ezfio_set_full_ci_zmq_energy(CI_energy) + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + + n_det_before = N_det + to_select = 2*N_det + to_select = max(64-to_select, to_select) + to_select = min(to_select, N_det_max-n_det_before) + call ZMQ_selection(to_select, pt2) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + if (N_det == N_det_max) then + threshold_davidson = threshold_davidson_in + SOFT_TOUCH threshold_davidson + endif + call diagonalize_CI + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) enddo + if (N_det < N_det_max) then + threshold_davidson = threshold_davidson_in + SOFT_TOUCH threshold_davidson + call diagonalize_CI + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + endif + if(do_pt2_end)then print*,'Last iteration only to compute the PT2' threshold_selectors = max(threshold_selectors,threshold_selectors_pt2) @@ -99,9 +111,11 @@ program fci_zmq print *, 'E+PT2 = ', E_CI_before+pt2 print *, '-----' enddo - call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before+pt2) + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) endif call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) end diff --git a/plugins/MP2/mp2.irp.f b/plugins/MP2/mp2.irp.f index 3a049f7b..d4721c71 100644 --- a/plugins/MP2/mp2.irp.f +++ b/plugins/MP2/mp2.irp.f @@ -1,4 +1,10 @@ program mp2 + no_vvvv_integrals = .True. + SOFT_TOUCH no_vvvv_integrals + call run +end + +subroutine run implicit none double precision, allocatable :: pt2(:), norm_pert(:) double precision :: H_pert_diag, E_old diff --git a/plugins/MP2/mp2_wf.irp.f b/plugins/MP2/mp2_wf.irp.f index 5efbb9cd..e7419319 100644 --- a/plugins/MP2/mp2_wf.irp.f +++ b/plugins/MP2/mp2_wf.irp.f @@ -1,4 +1,10 @@ program mp2_wf + no_vvvv_integrals = .True. + SOFT_TOUCH no_vvvv_integrals + call run +end + +subroutine run implicit none BEGIN_DOC ! Save the MP2 wave function diff --git a/plugins/MRCC_Utils/amplitudes.irp.f b/plugins/MRCC_Utils/amplitudes.irp.f index 82736b8f..0e6a4cf4 100644 --- a/plugins/MRCC_Utils/amplitudes.irp.f +++ b/plugins/MRCC_Utils/amplitudes.irp.f @@ -66,9 +66,18 @@ END_PROVIDER +BEGIN_PROVIDER [ integer, n_exc_active_sze ] + implicit none + BEGIN_DOC + ! Dimension of arrays to avoid zero-sized arrays + END_DOC + n_exc_active_sze = max(n_exc_active,1) +END_PROVIDER - BEGIN_PROVIDER [ integer, active_excitation_to_determinants_idx, (0:N_det_ref+1, n_exc_active) ] -&BEGIN_PROVIDER [ double precision, active_excitation_to_determinants_val, (N_states,N_det_ref+1, n_exc_active) ] + + + BEGIN_PROVIDER [ integer, active_excitation_to_determinants_idx, (0:N_det_ref+1, n_exc_active_sze) ] +&BEGIN_PROVIDER [ double precision, active_excitation_to_determinants_val, (N_states,N_det_ref+1, n_exc_active_sze) ] implicit none BEGIN_DOC ! Sparse matrix A containing the matrix to transform the active excitations to @@ -89,7 +98,7 @@ END_PROVIDER !$OMP shared(is_active_exc, active_hh_idx, active_pp_idx, n_exc_active)& !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh, s) allocate(lref(N_det_non_ref)) - !$OMP DO schedule(static,10) + !$OMP DO schedule(dynamic) do ppp=1,n_exc_active active_excitation_to_determinants_val(:,:,ppp) = 0d0 active_excitation_to_determinants_idx(:,ppp) = 0 @@ -136,10 +145,10 @@ END_PROVIDER END_PROVIDER - BEGIN_PROVIDER [ integer, mrcc_AtA_ind, (N_det_ref * n_exc_active) ] -&BEGIN_PROVIDER [ double precision, mrcc_AtA_val, (N_states, N_det_ref * n_exc_active) ] -&BEGIN_PROVIDER [ integer, mrcc_col_shortcut, (n_exc_active) ] -&BEGIN_PROVIDER [ integer, mrcc_N_col, (n_exc_active) ] + BEGIN_PROVIDER [ integer, mrcc_AtA_ind, (N_det_ref * n_exc_active_sze) ] +&BEGIN_PROVIDER [ double precision, mrcc_AtA_val, (N_states, N_det_ref * n_exc_active_sze) ] +&BEGIN_PROVIDER [ integer, mrcc_col_shortcut, (n_exc_active_sze) ] +&BEGIN_PROVIDER [ integer, mrcc_N_col, (n_exc_active_sze) ] implicit none BEGIN_DOC ! A is active_excitation_to_determinants in At.A @@ -170,7 +179,6 @@ END_PROVIDER do at_roww = 1, n_exc_active ! hh_nex at_row = active_pp_idx(at_roww) wk = 0 - if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", hh_nex do a_coll = 1, n_exc_active a_col = active_pp_idx(a_coll) @@ -224,7 +232,7 @@ END_PROVIDER deallocate (A_ind_mwen, A_val_mwen, As2_val_mwen, t) !$OMP END PARALLEL - print *, "ATA SIZE", ata_size + print *, "At.A SIZE", ata_size END_PROVIDER diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 0470960a..6bdadb24 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -640,8 +640,10 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda - if (N_st_diag > sze) then - stop 'error in Davidson : N_st_diag > sze' + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif PROVIDE nuclear_repulsion @@ -666,7 +668,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' @@ -715,6 +717,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz double precision :: r1, r2 do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 do i=1,sze call random_number(r1) call random_number(r2) @@ -762,10 +765,49 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz 1.d0, U, size(U,1), S, size(S,1), & 0.d0, s_, size(s_,1)) +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + ! Diagonalize h ! ------------- call lapack_diag(lambda,y,h,size(h,1),shift2) - + ! Compute S2 for each eigenvector ! ------------------------------- @@ -781,61 +823,77 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz s2(k) = s_(k,k) + S_z2_Sz enddo - if (s2_eig) then - logical :: state_ok(N_st_diag*davidson_sze_max) - do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 1.d0) - enddo - do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo - endif + if (s2_eig) then + logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + do k=1,size(state_ok) + state_ok(k) = .True. enddo endif + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo -! ! Compute overlap with U_in -! ! ------------------------- -! -! integer :: coord(2), order(N_st_diag) -! overlap = -1.d0 -! do k=1,shift2 -! do i=1,shift2 -! overlap(k,i) = dabs(y(k,i)) -! enddo -! enddo -! do k=1,N_st -! coord = maxloc(overlap) -! order( coord(2) ) = coord(1) -! overlap(coord(1),coord(2)) = -1.d0 -! enddo -! overlap = y -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! y(1:shift2,k) = overlap(1:shift2,l) -! endif -! enddo -! do k=1,N_st -! overlap(k,1) = lambda(k) -! overlap(k,2) = s2(k) -! enddo -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! lambda(k) = overlap(l,1) -! s2(k) = overlap(l,2) -! endif -! enddo + if (state_following) then + + ! Compute overlap with U_in + ! ------------------------- + + integer :: order(N_st_diag) + double precision :: cmax + overlap = -1.d0 + do k=1,shift2 + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,shift2 + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + + endif ! Express eigenvectors of h in the determinant basis @@ -852,11 +910,31 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz ! ----------------------- do k=1,N_st_diag - do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) - enddo +! if (state_ok(k)) then + do i=1,sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) + enddo +! else +! ! Randomize components with bad +! do i=1,sze-2,2 +! call random_number(r1) +! call random_number(r2) +! r1 = dsqrt(-2.d0*dlog(r1)) +! r2 = dtwo_pi*r2 +! U(i,shift2+k) = r1*dcos(r2) +! U(i+1,shift2+k) = r1*dsin(r2) +! enddo +! do i=sze-2+1,sze +! call random_number(r1) +! call random_number(r2) +! r1 = dsqrt(-2.d0*dlog(r1)) +! r2 = dtwo_pi*r2 +! U(i,shift2+k) = r1*dcos(r2) +! enddo +! endif + if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion @@ -879,20 +957,16 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz enddo - if (.not.converged) then - iter = itermax-1 - endif - ! Re-contract to u_in ! ----------- - do k=1,N_st_diag - energies(k) = lambda(k) - enddo + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & - U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + enddo + do k=1,N_st_diag + energies(k) = lambda(k) enddo write_buffer = '===== ' @@ -905,7 +979,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz deallocate ( & W, residual_norm, & - U, & + U, overlap, & c, S, & h, & y, s_, s_tmp, & @@ -968,10 +1042,11 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) + PROVIDE delta_ij_s2 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, & - !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in) + !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 @@ -1056,6 +1131,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i do istate=1,N_st vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j) vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i) + st (istate,i) = st (istate,i) + delta_ij_s2(istate_in,jj,ii)*ut(istate,j) + st (istate,j) = st (istate,j) + delta_ij_s2(istate_in,jj,ii)*ut(istate,i) enddo enddo enddo diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index f28ccf25..d6b9cc79 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -33,6 +33,7 @@ END_PROVIDER if (ihpsi_current(k) == 0.d0) then ihpsi_current(k) = 1.d-32 endif +! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then @@ -77,19 +78,6 @@ BEGIN_PROVIDER [ double precision, hij_mrcc, (N_det_non_ref,N_det_ref) ] END_PROVIDER -! BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] -!&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ] -! implicit none -! BEGIN_DOC -! ! Dressing matrix in N_det basis -! END_DOC -! integer :: i,j,m -! delta_ij = 0.d0 -! delta_ii = 0.d0 -! call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref) -! -!END_PROVIDER - BEGIN_PROVIDER [ double precision, h_matrix_dressed, (N_det,N_det,N_states) ] implicit none @@ -149,8 +137,13 @@ END_PROVIDER allocate (eigenvectors(size(CI_eigenvectors_dressed,1),size(CI_eigenvectors_dressed,2)), & eigenvalues(size(CI_electronic_energy_dressed,1))) + do j=1,min(N_states,N_det) + do i=1,N_det + eigenvectors(i,j) = psi_coef(i,j) + enddo + enddo do mrcc_state=1,N_states - do j=1,min(N_states,N_det) + do j=mrcc_state,min(N_states,N_det) do i=1,N_det eigenvectors(i,j) = psi_coef(i,j) enddo @@ -161,17 +154,15 @@ END_PROVIDER output_determinants,mrcc_state) CI_eigenvectors_dressed(1:N_det,mrcc_state) = eigenvectors(1:N_det,mrcc_state) CI_electronic_energy_dressed(mrcc_state) = eigenvalues(mrcc_state) - if (mrcc_state == 1) then - do k=N_states+1,N_states_diag - CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k) - CI_electronic_energy_dressed(k) = eigenvalues(k) - enddo - endif - enddo - call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& + enddo + do k=N_states+1,N_states_diag + CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k) + CI_electronic_energy_dressed(k) = eigenvalues(k) + enddo + call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& N_states_diag,size(CI_eigenvectors_dressed,1)) - deallocate (eigenvectors,eigenvalues) + deallocate (eigenvectors,eigenvalues) else if (diag_algorithm == "Lapack") then @@ -649,14 +640,12 @@ END_PROVIDER allocate(rho_mrcc_init(N_det_non_ref)) allocate(x_new(hh_nex)) allocate(x(hh_nex), AtB(hh_nex)) - x = 0d0 - do s=1,N_states AtB(:) = 0.d0 !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, active_excitation_to_determinants_idx,& - !$OMP active_excitation_to_determinants_val, x, N_det_ref, hh_nex, N_det_non_ref) & + !$OMP active_excitation_to_determinants_val, N_det_ref, hh_nex, N_det_non_ref) & !$OMP private(at_row, a_col, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& !$OMP shared(N_states,mrcc_col_shortcut, mrcc_N_col, AtB, mrcc_AtA_val, mrcc_AtA_ind, s, n_exc_active, active_pp_idx) @@ -712,21 +701,19 @@ END_PROVIDER end do deallocate(lref) + do i=1,N_det_non_ref + rho_mrcc(i,s) = rho_mrcc_init(i) + enddo + x_new = x double precision :: factor, resold factor = 1.d0 resold = huge(1.d0) - do k=0,100000 - !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll) - - !$OMP DO - do i=1,N_det_non_ref - rho_mrcc(i,s) = rho_mrcc_init(i) - enddo - !$OMP END DO NOWAIT - + do k=0,10*hh_nex + res = 0.d0 + !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll) reduction(+:res) !$OMP DO do a_coll = 1, n_exc_active a_col = active_pp_idx(a_coll) @@ -735,35 +722,38 @@ END_PROVIDER cx = cx + x(mrcc_AtA_ind(i)) * mrcc_AtA_val(s,i) end do x_new(a_col) = AtB(a_col) + cx * factor - end do - !$OMP END DO - - !$OMP END PARALLEL - - - res = 0.d0 - do a_coll=1,n_exc_active - a_col = active_pp_idx(a_coll) - do j=1,N_det_non_ref - i = active_excitation_to_determinants_idx(j,a_coll) - if (i==0) exit - rho_mrcc(i,s) = rho_mrcc(i,s) + active_excitation_to_determinants_val(s,j,a_coll) * X_new(a_col) - enddo res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) X(a_col) = X_new(a_col) end do + !$OMP END DO + !$OMP END PARALLEL + if (res > resold) then - factor = -factor * 0.5d0 + factor = factor * 0.5d0 endif resold = res - if(mod(k, 100) == 0) then + if(iand(k, 4095) == 0) then print *, "res ", k, res end if - if(res < 1d-9) exit + if(res < 1d-10) exit end do - + dIj_unique(1:size(X), s) = X(1:size(X)) + + enddo + + do s=1,N_states + + do a_coll=1,n_exc_active + a_col = active_pp_idx(a_coll) + do j=1,N_det_non_ref + i = active_excitation_to_determinants_idx(j,a_coll) + if (i==0) exit + rho_mrcc(i,s) = rho_mrcc(i,s) + active_excitation_to_determinants_val(s,j,a_coll) * dIj_unique(a_col,s) + enddo + end do + norm = 0.d0 do i=1,N_det_non_ref norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) @@ -775,122 +765,11 @@ END_PROVIDER enddo ! Norm now contains the norm of Psi + A.X - print *, k, "res : ", res, "norm : ", sqrt(norm) - -!--------------- -! double precision :: e_0, overlap -! double precision, allocatable :: u_0(:) -! integer(bit_kind), allocatable :: keys_tmp(:,:,:) -! allocate (u_0(N_det), keys_tmp(N_int,2,N_det) ) -! k=0 -! overlap = 0.d0 -! do i=1,N_det_ref -! k = k+1 -! u_0(k) = psi_ref_coef(i,1) -! keys_tmp(:,:,k) = psi_ref(:,:,i) -! overlap += u_0(k)*psi_ref_coef(i,1) -! enddo -! norm = 0.d0 -! do i=1,N_det_non_ref -! k = k+1 -! u_0(k) = psi_non_ref_coef(i,1) -! keys_tmp(:,:,k) = psi_non_ref(:,:,i) -! overlap += u_0(k)*psi_non_ref_coef(i,1) -! enddo -! -! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) -! print *, 'Energy of |Psi_CASSD> : ', e_0 + nuclear_repulsion, overlap -! -! k=0 -! overlap = 0.d0 -! do i=1,N_det_ref -! k = k+1 -! u_0(k) = psi_ref_coef(i,1) -! keys_tmp(:,:,k) = psi_ref(:,:,i) -! overlap += u_0(k)*psi_ref_coef(i,1) -! enddo -! norm = 0.d0 -! do i=1,N_det_non_ref -! k = k+1 -! ! f is such that f.\tilde{c_i} = c_i -! f = psi_non_ref_coef(i,1) / rho_mrcc(i,1) -! -! ! Avoid numerical instabilities -! f = min(f,2.d0) -! f = max(f,-2.d0) -! -! f = 1.d0 -! -! u_0(k) = rho_mrcc(i,1)*f -! keys_tmp(:,:,k) = psi_non_ref(:,:,i) -! norm += u_0(k)**2 -! overlap += u_0(k)*psi_non_ref_coef(i,1) -! enddo -! -! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) -! print *, 'Energy of |(1+T)Psi_0> : ', e_0 + nuclear_repulsion, overlap -! -! f = 1.d0/norm -! norm = 1.d0 -! do i=1,N_det_ref -! norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) -! enddo -! f = dsqrt(f*norm) -! overlap = norm -! do i=1,N_det_non_ref -! u_0(k) = rho_mrcc(i,1)*f -! overlap += u_0(k)*psi_non_ref_coef(i,1) -! enddo -! -! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) -! print *, 'Energy of |(1+T)Psi_0> (normalized) : ', e_0 + nuclear_repulsion, overlap -! -! k=0 -! overlap = 0.d0 -! do i=1,N_det_ref -! k = k+1 -! u_0(k) = psi_ref_coef(i,1) -! keys_tmp(:,:,k) = psi_ref(:,:,i) -! overlap += u_0(k)*psi_ref_coef(i,1) -! enddo -! norm = 0.d0 -! do i=1,N_det_non_ref -! k = k+1 -! ! f is such that f.\tilde{c_i} = c_i -! f = psi_non_ref_coef(i,1) / rho_mrcc(i,1) -! -! ! Avoid numerical instabilities -! f = min(f,2.d0) -! f = max(f,-2.d0) -! -! u_0(k) = rho_mrcc(i,1)*f -! keys_tmp(:,:,k) = psi_non_ref(:,:,i) -! norm += u_0(k)**2 -! overlap += u_0(k)*psi_non_ref_coef(i,1) -! enddo -! -! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) -! print *, 'Energy of |(1+T)Psi_0> (mu_i): ', e_0 + nuclear_repulsion, overlap -! -! f = 1.d0/norm -! norm = 1.d0 -! do i=1,N_det_ref -! norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) -! enddo -! overlap = norm -! f = dsqrt(f*norm) -! do i=1,N_det_non_ref -! u_0(k) = rho_mrcc(i,1)*f -! overlap += u_0(k)*psi_non_ref_coef(i,1) -! enddo -! -! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) -! print *, 'Energy of |(1+T)Psi_0> (normalized mu_i) : ', e_0 + nuclear_repulsion, overlap -! -! deallocate(u_0, keys_tmp) -! -!--------------- + print *, "norm : ", sqrt(norm) + enddo + + do s=1,N_states norm = 0.d0 double precision :: f do i=1,N_det_non_ref @@ -898,12 +777,16 @@ END_PROVIDER rho_mrcc(i,s) = 1.d-32 endif - ! f is such that f.\tilde{c_i} = c_i - f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) + if (lambda_type == 2) then + f = 1.d0 + else + ! f is such that f.\tilde{c_i} = c_i + f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) - ! Avoid numerical instabilities - f = min(f,2.d0) - f = max(f,-2.d0) + ! Avoid numerical instabilities + f = min(f,2.d0) + f = max(f,-2.d0) + endif norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) rho_mrcc(i,s) = f @@ -938,7 +821,6 @@ END_PROVIDER ! rho_mrcc now contains the product of the scaling factors and the ! normalization constant - dIj_unique(1:size(X), s) = X(1:size(X)) end do END_PROVIDER @@ -950,17 +832,14 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] integer :: s,i,j double precision, external :: get_dij_index print *, "computing amplitudes..." - !$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j) do s=1, N_states - !$OMP DO do i=1, N_det_non_ref do j=1, N_det_ref + !DIR$ FORCEINLINE dij(j, i, s) = get_dij_index(j, i, s, N_int) end do end do - !$OMP END DO end do - !$OMP END PARALLEL print *, "done computing amplitudes" END_PROVIDER @@ -982,7 +861,7 @@ double precision function get_dij_index(II, i, s, Nint) else if(lambda_type == 2) then call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase - get_dij_index = get_dij_index + get_dij_index = get_dij_index * rho_mrcc(i,s) end if end function diff --git a/plugins/MRCC_Utils/multi_state.irp.f b/plugins/MRCC_Utils/multi_state.irp.f new file mode 100644 index 00000000..b4a2a3cb --- /dev/null +++ b/plugins/MRCC_Utils/multi_state.irp.f @@ -0,0 +1,101 @@ +subroutine multi_state(CI_electronic_energy_dressed_,CI_eigenvectors_dressed_,LDA) + implicit none + BEGIN_DOC + ! Multi-state mixing + END_DOC + integer, intent(in) :: LDA + double precision, intent(inout) :: CI_electronic_energy_dressed_(N_states) + double precision, intent(inout) :: CI_eigenvectors_dressed_(LDA,N_states) + double precision, allocatable :: h(:,:,:), s(:,:), Psi(:,:), H_Psi(:,:,:), H_jj(:) + + allocate( h(N_states,N_states,0:N_states), s(N_states,N_states) ) + allocate( Psi(LDA,N_states), H_Psi(LDA,N_states,0:N_states) ) + allocate (H_jj(LDA) ) + +! e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n) + + integer :: i,j,k,istate + double precision :: U(N_states,N_states), Vt(N_states,N_states), D(N_states) + double precision, external :: diag_H_mat_elem + do istate=1,N_states + do i=1,N_det + H_jj(i) = diag_H_mat_elem(psi_det(1,1,i),N_int) + enddo + + do i=1,N_det_ref + H_jj(idx_ref(i)) += delta_ii(istate,i) + enddo + + do k=1,N_states + do i=1,N_det + Psi(i,k) = CI_eigenvectors_dressed_(i,k) + enddo + enddo + call H_u_0_mrcc_nstates(H_Psi(1,1,istate),Psi,H_jj,N_det,psi_det,N_int,istate,N_states,LDA) + + do k=1,N_states + do i=1,N_states + double precision, external :: u_dot_v + h(i,k,istate) = u_dot_v(Psi(1,i), H_Psi(1,k,istate), N_det) + enddo + enddo + enddo + + do k=1,N_states + do i=1,N_states + s(i,k) = u_dot_v(Psi(1,i), Psi(1,k), N_det) + enddo + enddo + + print *, s(:,:) + print *, '' + + h(:,:,0) = h(:,:,1) + do istate=2,N_states + U(:,:) = h(:,:,0) + call dgemm('N','N',N_states,N_states,N_states,1.d0,& + U, size(U,1), h(1,1,istate), size(h,1), 0.d0, & + h(1,1,0), size(Vt,1)) + enddo + + call svd(h(1,1,0), size(h,1), U, size(U,1), D, Vt, size(Vt,1), N_states, N_states) + do k=1,N_states + D(k) = D(k)**(1./dble(N_states)) + if (D(k) > 0.d0) then + D(k) = -D(k) + endif + enddo + + do j=1,N_states + do i=1,N_states + h(i,j,0) = 0.d0 + do k=1,N_states + h(i,j,0) += U(i,k) * D(k) * Vt(k,j) + enddo + enddo + enddo + + print *, h(:,:,0) + print *,'' + + integer :: LWORK, INFO + double precision, allocatable :: WORK(:) + LWORK=3*N_states + allocate (WORK(LWORK)) + call dsygv(1, 'V', 'U', N_states, h(1,1,0), size(h,1), s, size(s,1), D, WORK, LWORK, INFO) + deallocate(WORK) + + do j=1,N_states + do i=1,N_det + CI_eigenvectors_dressed_(i,j) = 0.d0 + do k=1,N_states + CI_eigenvectors_dressed_(i,j) += Psi(i,k) * h(k,j,0) + enddo + enddo + CI_electronic_energy_dressed_(j) = D(j) + enddo + + + deallocate (h,s, H_jj) + deallocate( Psi, H_Psi ) +end diff --git a/src/MRPT_Utils/EZFIO.cfg b/plugins/MRPT_Utils/EZFIO.cfg similarity index 100% rename from src/MRPT_Utils/EZFIO.cfg rename to plugins/MRPT_Utils/EZFIO.cfg diff --git a/src/MRPT_Utils/H_apply.irp.f b/plugins/MRPT_Utils/H_apply.irp.f similarity index 100% rename from src/MRPT_Utils/H_apply.irp.f rename to plugins/MRPT_Utils/H_apply.irp.f diff --git a/src/MRPT_Utils/NEEDED_CHILDREN_MODULES b/plugins/MRPT_Utils/NEEDED_CHILDREN_MODULES similarity index 100% rename from src/MRPT_Utils/NEEDED_CHILDREN_MODULES rename to plugins/MRPT_Utils/NEEDED_CHILDREN_MODULES diff --git a/src/MRPT_Utils/README.rst b/plugins/MRPT_Utils/README.rst similarity index 100% rename from src/MRPT_Utils/README.rst rename to plugins/MRPT_Utils/README.rst diff --git a/src/MRPT_Utils/energies_cas.irp.f b/plugins/MRPT_Utils/energies_cas.irp.f similarity index 100% rename from src/MRPT_Utils/energies_cas.irp.f rename to plugins/MRPT_Utils/energies_cas.irp.f diff --git a/src/MRPT_Utils/excitations_cas.irp.f b/plugins/MRPT_Utils/excitations_cas.irp.f similarity index 100% rename from src/MRPT_Utils/excitations_cas.irp.f rename to plugins/MRPT_Utils/excitations_cas.irp.f diff --git a/plugins/MRPT_Utils/ezfio_interface.irp.f b/plugins/MRPT_Utils/ezfio_interface.irp.f new file mode 100644 index 00000000..6bd8931d --- /dev/null +++ b/plugins/MRPT_Utils/ezfio_interface.irp.f @@ -0,0 +1,23 @@ +! DO NOT MODIFY BY HAND +! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py +! from file /home/scemama/quantum_package/src/MRPT_Utils/EZFIO.cfg + + +BEGIN_PROVIDER [ logical, do_third_order_1h1p ] + implicit none + BEGIN_DOC +! If true, compute the third order contribution for the 1h1p + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrpt_utils_do_third_order_1h1p(has) + if (has) then + call ezfio_get_mrpt_utils_do_third_order_1h1p(do_third_order_1h1p) + else + print *, 'mrpt_utils/do_third_order_1h1p not found in EZFIO file' + stop 1 + endif + +END_PROVIDER diff --git a/src/MRPT_Utils/fock_like_operators.irp.f b/plugins/MRPT_Utils/fock_like_operators.irp.f similarity index 100% rename from src/MRPT_Utils/fock_like_operators.irp.f rename to plugins/MRPT_Utils/fock_like_operators.irp.f diff --git a/src/MRPT_Utils/give_2h2p.irp.f b/plugins/MRPT_Utils/give_2h2p.irp.f similarity index 100% rename from src/MRPT_Utils/give_2h2p.irp.f rename to plugins/MRPT_Utils/give_2h2p.irp.f diff --git a/src/MRPT_Utils/mrpt_dress.irp.f b/plugins/MRPT_Utils/mrpt_dress.irp.f similarity index 100% rename from src/MRPT_Utils/mrpt_dress.irp.f rename to plugins/MRPT_Utils/mrpt_dress.irp.f diff --git a/src/MRPT_Utils/mrpt_utils.irp.f b/plugins/MRPT_Utils/mrpt_utils.irp.f similarity index 100% rename from src/MRPT_Utils/mrpt_utils.irp.f rename to plugins/MRPT_Utils/mrpt_utils.irp.f diff --git a/src/MRPT_Utils/new_way.irp.f b/plugins/MRPT_Utils/new_way.irp.f similarity index 100% rename from src/MRPT_Utils/new_way.irp.f rename to plugins/MRPT_Utils/new_way.irp.f diff --git a/src/MRPT_Utils/new_way_second_order_coef.irp.f b/plugins/MRPT_Utils/new_way_second_order_coef.irp.f similarity index 100% rename from src/MRPT_Utils/new_way_second_order_coef.irp.f rename to plugins/MRPT_Utils/new_way_second_order_coef.irp.f diff --git a/src/MRPT_Utils/psi_active_prov.irp.f b/plugins/MRPT_Utils/psi_active_prov.irp.f similarity index 100% rename from src/MRPT_Utils/psi_active_prov.irp.f rename to plugins/MRPT_Utils/psi_active_prov.irp.f diff --git a/src/MRPT_Utils/second_order_new.irp.f b/plugins/MRPT_Utils/second_order_new.irp.f similarity index 100% rename from src/MRPT_Utils/second_order_new.irp.f rename to plugins/MRPT_Utils/second_order_new.irp.f diff --git a/src/MRPT_Utils/second_order_new_2p.irp.f b/plugins/MRPT_Utils/second_order_new_2p.irp.f similarity index 100% rename from src/MRPT_Utils/second_order_new_2p.irp.f rename to plugins/MRPT_Utils/second_order_new_2p.irp.f diff --git a/src/MRPT_Utils/utils_bitmask.irp.f b/plugins/MRPT_Utils/utils_bitmask.irp.f similarity index 100% rename from src/MRPT_Utils/utils_bitmask.irp.f rename to plugins/MRPT_Utils/utils_bitmask.irp.f diff --git a/plugins/Psiref_threshold/psi_ref.irp.f b/plugins/Psiref_threshold/psi_ref.irp.f index 5e722822..ee69ef5c 100644 --- a/plugins/Psiref_threshold/psi_ref.irp.f +++ b/plugins/Psiref_threshold/psi_ref.irp.f @@ -6,19 +6,22 @@ use bitmasks &BEGIN_PROVIDER [ integer, N_det_ref ] implicit none BEGIN_DOC - ! Reference wave function, defined as determinants with coefficients > 0.05 + ! Reference wave function, defined as determinants with amplitudes > 0.05 ! idx_ref gives the indice of the ref determinant in psi_det. END_DOC integer :: i, k, l logical :: good - double precision, parameter :: threshold=0.05d0 + double precision, parameter :: threshold=0.05d0 + double precision :: t(N_states) N_det_ref = 0 - t = threshold * abs_psi_coef_max + do l = 1, N_states + t(l) = threshold * abs_psi_coef_max(l) + enddo do i=1,N_det good = .False. - do l = 1, N_states + do l=1, N_states psi_ref_coef(i,l) = 0.d0 - good = good.or.(dabs(psi_coef(i,l)) > t) + good = good.or.(dabs(psi_coef(i,l)) > t(l)) enddo if (good) then N_det_ref = N_det_ref+1 diff --git a/plugins/QmcChem/e_curve_qmc.irp.f b/plugins/QmcChem/e_curve_qmc.irp.f index 4beed3fa..169db84e 100644 --- a/plugins/QmcChem/e_curve_qmc.irp.f +++ b/plugins/QmcChem/e_curve_qmc.irp.f @@ -1,10 +1,12 @@ program e_curve use bitmasks implicit none - integer :: i,j,k, nab, m, l + integer :: i,j,k, kk, nab, m, l double precision :: norm, E, hij, num, ci, cj integer, allocatable :: iorder(:) double precision , allocatable :: norm_sort(:) + PROVIDE mo_bielec_integrals_in_map + nab = n_det_alpha_unique+n_det_beta_unique allocate ( norm_sort(0:nab), iorder(0:nab) ) @@ -60,7 +62,7 @@ program e_curve num = 0.d0 norm = 0.d0 m = 0 - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,l,det_i,det_j,ci,cj,hij) REDUCTION(+:norm,m,num) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,kk,l,det_i,det_j,ci,cj,hij) REDUCTION(+:norm,m,num) allocate( det_i(N_int,2), det_j(N_int,2)) !$OMP DO SCHEDULE(guided) do k=1,n_det @@ -68,15 +70,19 @@ program e_curve cycle endif ci = psi_bilinear_matrix_values(k,1) - det_i(:,1) = psi_det_alpha_unique(:,psi_bilinear_matrix_rows(k)) - det_i(:,2) = psi_det_beta_unique(:,psi_bilinear_matrix_columns(k)) + do kk=1,N_int + det_i(kk,1) = psi_det_alpha_unique(kk,psi_bilinear_matrix_rows(k)) + det_i(kk,2) = psi_det_beta_unique(kk,psi_bilinear_matrix_columns(k)) + enddo do l=1,n_det if (psi_bilinear_matrix_values(l,1) == 0.d0) then cycle endif cj = psi_bilinear_matrix_values(l,1) - det_j(:,1) = psi_det_alpha_unique(:,psi_bilinear_matrix_rows(l)) - det_j(:,2) = psi_det_beta_unique(:,psi_bilinear_matrix_columns(l)) + do kk=1,N_int + det_j(kk,1) = psi_det_alpha_unique(kk,psi_bilinear_matrix_rows(l)) + det_j(kk,2) = psi_det_beta_unique(kk,psi_bilinear_matrix_columns(l)) + enddo call i_h_j(det_i, det_j, N_int, hij) num = num + ci*cj*hij enddo diff --git a/plugins/mrcc_selected/dressing.irp.f b/plugins/mrcc_selected/dressing.irp.f new file mode 100644 index 00000000..c772e2aa --- /dev/null +++ b/plugins/mrcc_selected/dressing.irp.f @@ -0,0 +1,1076 @@ +use bitmasks + + + + BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ] + use bitmasks + implicit none + integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc + integer(bit_kind) :: mask(N_int, 2), omask(N_int, 2) + integer(bit_kind),allocatable :: buf(:,:,:) + logical :: ok + logical, external :: detEq + + delta_ij_mrcc = 0d0 + delta_ii_mrcc = 0d0 + delta_ij_s2_mrcc = 0d0 + delta_ii_s2_mrcc = 0d0 + PROVIDE dij + provide hh_shortcut psi_det_size! lambda_mrcc + !$OMP PARALLEL DO default(none) schedule(dynamic) & + !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & + !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & + !$OMP private(h, n, mask, omask, buf, ok, iproc) + do gen= 1, N_det_generators + allocate(buf(N_int, 2, N_det_non_ref)) + iproc = omp_get_thread_num() + 1 + if(mod(gen, 1000) == 0) print *, "mrcc ", gen, "/", N_det_generators + do h=1, hh_shortcut(0) + call apply_hole_local(psi_det_generators(1,1,gen), hh_exists(1, h), mask, ok, N_int) + if(.not. ok) cycle + omask = 0_bit_kind + if(hh_exists(1, h) /= 0) omask = mask + n = 1 + do p=hh_shortcut(h), hh_shortcut(h+1)-1 + call apply_particle_local(mask, pp_exists(1, p), buf(1,1,n), ok, N_int) + if(ok) n = n + 1 + if(n > N_det_non_ref) stop "MRCC..." + end do + n = n - 1 + + if(n /= 0) then + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask) + endif + + end do + deallocate(buf) + end do + !$OMP END PARALLEL DO +END_PROVIDER + + +! subroutine blit(b1, b2) +! double precision :: b1(N_states,N_det_non_ref,N_det_ref), b2(N_states,N_det_non_ref,N_det_ref) +! b1 = b1 + b2 +! end subroutine + + +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask) + use bitmasks + implicit none + + integer, intent(in) :: i_generator,n_selected, Nint + double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) + double precision, intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref) + + integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) + integer :: i,j,k,l,m + integer,allocatable :: idx_alpha(:), degree_alpha(:) + logical :: good, fullMatch + + integer(bit_kind),allocatable :: tq(:,:,:) + integer :: N_tq, c_ref ,degree + + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states) + double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) + double precision :: haj, phase, phase2 + double precision :: f(N_states), ci_inv(N_states) + integer :: exc(0:2,2,2) + integer :: h1,h2,p1,p2,s1,s2 + integer(bit_kind) :: tmp_det(Nint,2) + integer :: iint, ipos + integer :: i_state, k_sd, l_sd, i_I, i_alpha + + integer(bit_kind),allocatable :: miniList(:,:,:) + integer(bit_kind),intent(in) :: key_mask(Nint, 2) + integer,allocatable :: idx_miniList(:) + integer :: N_miniList, ni, leng + double precision, allocatable :: hij_cache(:), sij_cache(:) + + integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:) + integer, allocatable :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:) + integer :: mobiles(2), smallerlist + logical, external :: detEq, is_generable + !double precision, external :: get_dij, get_dij_index + + + leng = max(N_det_generators, N_det_non_ref) + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) + allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) + !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint) + call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) + +! if(fullMatch) then +! return +! end if + + allocate(ptr_microlist(0:mo_tot_num*2+1), & + N_microlist(0:mo_tot_num*2) ) + allocate( microlist(Nint,2,N_minilist*4), & + idx_microlist(N_minilist*4)) + + if(key_mask(1,1) /= 0) then + call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint) + call filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microlist,ptr_microlist,N_microlist,key_mask) + else + call filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_minilist) + end if + + + + deallocate(microlist, idx_microlist) + + allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref)) + + ! |I> + + ! |alpha> + + if(N_tq > 0) then + call create_minilist(key_mask, psi_non_ref, miniList, idx_minilist, N_det_non_ref, N_minilist, Nint) + if(N_minilist == 0) return + + + if(key_mask(1,1) /= 0) then !!!!!!!!!!! PAS GENERAL !!!!!!!!! + allocate(microlist_zero(Nint,2,N_minilist), idx_microlist_zero(N_minilist)) + + allocate( microlist(Nint,2,N_minilist*4), & + idx_microlist(N_minilist*4)) + call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint) + + + do i=0,mo_tot_num*2 + do k=ptr_microlist(i),ptr_microlist(i+1)-1 + idx_microlist(k) = idx_minilist(idx_microlist(k)) + end do + end do + + do l=1,N_microlist(0) + do k=1,Nint + microlist_zero(k,1,l) = microlist(k,1,l) + microlist_zero(k,2,l) = microlist(k,2,l) + enddo + idx_microlist_zero(l) = idx_microlist(l) + enddo + end if + end if + + + do i_alpha=1,N_tq + if(key_mask(1,1) /= 0) then + call getMobiles(tq(1,1,i_alpha), key_mask, mobiles, Nint) + + if(N_microlist(mobiles(1)) < N_microlist(mobiles(2))) then + smallerlist = mobiles(1) + else + smallerlist = mobiles(2) + end if + + + do l=0,N_microlist(smallerlist)-1 + microlist_zero(:,:,ptr_microlist(1) + l) = microlist(:,:,ptr_microlist(smallerlist) + l) + idx_microlist_zero(ptr_microlist(1) + l) = idx_microlist(ptr_microlist(smallerlist) + l) + end do + + call get_excitation_degree_vector(microlist_zero,tq(1,1,i_alpha),degree_alpha,Nint,N_microlist(smallerlist)+N_microlist(0),idx_alpha) + do j=1,idx_alpha(0) + idx_alpha(j) = idx_microlist_zero(idx_alpha(j)) + end do + + else + call get_excitation_degree_vector(miniList,tq(1,1,i_alpha),degree_alpha,Nint,N_minilist,idx_alpha) + do j=1,idx_alpha(0) + idx_alpha(j) = idx_miniList(idx_alpha(j)) + end do + end if + + + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) + call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) + enddo + ! |I> + do i_I=1,N_det_ref + ! Find triples and quadruple grand parents + call get_excitation_degree(tq(1,1,i_alpha),psi_ref(1,1,i_I),degree,Nint) + if (degree > 4) then + cycle + endif + + do i_state=1,N_states + dIa(i_state) = 0.d0 + enddo + + ! |alpha> + do k_sd=1,idx_alpha(0) + ! Loop if lambda == 0 + logical :: loop +! loop = .True. +! do i_state=1,N_states +! if (lambda_mrcc(i_state,idx_alpha(k_sd)) /= 0.d0) then +! loop = .False. +! exit +! endif +! enddo +! if (loop) then +! cycle +! endif + + call get_excitation_degree(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),degree,Nint) + if (degree > 2) then + cycle + endif + + ! + ! + !hIk = hij_mrcc(idx_alpha(k_sd),i_I) + ! call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),Nint,hIk) + + do i_state=1,N_states + dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) + !dIk(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(k_sd)), N_int) !!hIk * lambda_mrcc(i_state,idx_alpha(k_sd)) + !dIk(i_state) = psi_non_ref_coef(idx_alpha(k_sd), i_state) / psi_ref_coef(i_I, i_state) + enddo + + + ! |l> = Exc(k -> alpha) |I> + call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree,phase,Nint) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + do k=1,N_int + tmp_det(k,1) = psi_ref(k,1,i_I) + tmp_det(k,2) = psi_ref(k,2,i_I) + enddo + logical :: ok + call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint) + if(.not. ok) cycle + + ! + do i_state=1,N_states + dka(i_state) = 0.d0 + enddo + do l_sd=k_sd+1,idx_alpha(0) + call get_excitation_degree(tmp_det,psi_non_ref(1,1,idx_alpha(l_sd)),degree,Nint) + if (degree == 0) then + +! loop = .True. +! do i_state=1,N_states +! if (lambda_mrcc(i_state,idx_alpha(l_sd)) /= 0.d0) then +! loop = .False. +! exit +! endif +! enddo + loop = .false. + if (.not.loop) then + call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),exc,degree,phase2,Nint) + hIl = hij_mrcc(idx_alpha(l_sd),i_I) +! call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hIl) + do i_state=1,N_states + dka(i_state) = dij(i_I, idx_alpha(l_sd), i_state) * phase * phase2 + !dka(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(l_sd)), N_int) * phase * phase2 !hIl * lambda_mrcc(i_state,idx_alpha(l_sd)) * phase * phase2 + !dka(i_state) = psi_non_ref_coef(idx_alpha(l_sd), i_state) / psi_ref_coef(i_I, i_state) * phase * phase2 + enddo + endif + + exit + endif + enddo + do i_state=1,N_states + dIa(i_state) = dIa(i_state) + dIk(i_state) * dka(i_state) + enddo + enddo + + do i_state=1,N_states + ci_inv(i_state) = psi_ref_coef_inv(i_I,i_state) + enddo + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + hla = hij_cache(k_sd) + sla = sij_cache(k_sd) +! call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla) + do i_state=1,N_states + dIa_hla(i_state,k_sd) = dIa(i_state) * hla + dIa_sla(i_state,k_sd) = dIa(i_state) * sla + enddo + enddo + call omp_set_lock( psi_ref_lock(i_I) ) + do i_state=1,N_states + if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) + delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) + delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + enddo + else + delta_ii_(i_state,i_I) = 0.d0 + do l_sd=1,idx_alpha(0) + k_sd = idx_alpha(l_sd) + delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) + enddo + endif + enddo + call omp_unset_lock( psi_ref_lock(i_I) ) + enddo + enddo + deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) + deallocate(miniList, idx_miniList) +end + + + + + BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ] + use bitmasks + implicit none + integer :: i, j, i_state + + !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc + + if(mrmode == 3) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i) + enddo + end do + end do + + ! =-=-= BEGIN STATE AVERAGE +! do i = 1, N_det_ref +! delta_ii(:,i)= delta_ii_mrcc(1,i) +! delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i) +! do i_state = 2, N_states +! delta_ii(:,i) += delta_ii_mrcc(i_state,i) +! delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i) +! enddo +! do j = 1, N_det_non_ref +! delta_ij(:,j,i) = delta_ij_mrcc(1,j,i) +! delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i) +! do i_state = 2, N_states +! delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i) +! delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i) +! enddo +! end do +! end do +! delta_ij = delta_ij * (1.d0/dble(N_states)) +! delta_ii = delta_ii * (1.d0/dble(N_states)) + ! =-=-= END STATE AVERAGE + ! + ! do i = 1, N_det_ref + ! delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state) + ! do j = 1, N_det_non_ref + ! delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state) + ! end do + ! end do + else if(mrmode == 2) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i)= delta_ii_old(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i) + enddo + end do + end do + else if(mrmode == 1) then + do i = 1, N_det_ref + do i_state = 1, N_states + delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) + delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) + delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state) + enddo + end do + end do + else + stop "invalid mrmode" + end if +END_PROVIDER + + +BEGIN_PROVIDER [ integer, HP, (2,N_det_non_ref) ] + integer :: i + do i=1,N_det_non_ref + call getHP(psi_non_ref(1,1,i), HP(1,i), HP(2,i), N_int) + end do +END_PROVIDER + + BEGIN_PROVIDER [ integer, cepa0_shortcut, (0:N_det_non_ref+1) ] +&BEGIN_PROVIDER [ integer, det_cepa0_idx, (N_det_non_ref) ] +&BEGIN_PROVIDER [ integer(bit_kind), det_cepa0_active, (N_int,2,N_det_non_ref) ] +&BEGIN_PROVIDER [ integer(bit_kind), det_ref_active, (N_int,2,N_det_ref) ] +&BEGIN_PROVIDER [ integer(bit_kind), active_sorb, (N_int,2) ] +&BEGIN_PROVIDER [ integer(bit_kind), det_cepa0, (N_int,2,N_det_non_ref) ] +&BEGIN_PROVIDER [ integer, nlink, (N_det_ref) ] +&BEGIN_PROVIDER [ integer, linked, (N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ integer, blokMwen, (N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, searchance, (N_det_ref) ] +&BEGIN_PROVIDER [ integer, child_num, (N_det_non_ref,N_det_ref) ] + + use bitmasks + implicit none + + integer(bit_kind),allocatable :: det_noactive(:,:,:) + integer, allocatable :: shortcut(:), idx(:) + integer(bit_kind) :: nonactive_sorb(N_int,2), det(N_int, 2) + integer i, II, j, k, n, ni, blok, degree + logical, external :: detEq + + allocate(det_noactive(N_int, 2, N_det_non_ref)) + allocate(idx(N_det_non_ref), shortcut(0:N_det_non_ref+1)) + print *, "pre start" + active_sorb(:,:) = 0_8 + nonactive_sorb(:,:) = not(0_8) + + if(N_det_ref > 1) then + do i=1, N_det_ref + do k=1, N_int + active_sorb(k,1) = ior(psi_ref(k,1,i), active_sorb(k,1)) + active_sorb(k,2) = ior(psi_ref(k,2,i), active_sorb(k,2)) + nonactive_sorb(k,1) = iand(psi_ref(k,1,i), nonactive_sorb(k,1)) + nonactive_sorb(k,2) = iand(psi_ref(k,2,i), nonactive_sorb(k,2)) + end do + end do + do k=1, N_int + active_sorb(k,1) = iand(active_sorb(k,1), not(nonactive_sorb(k,1))) + active_sorb(k,2) = iand(active_sorb(k,2), not(nonactive_sorb(k,2))) + end do + end if + + + do i=1, N_det_non_ref + do k=1, N_int + det_noactive(k,1,i) = iand(psi_non_ref(k,1,i), not(active_sorb(k,1))) + det_noactive(k,2,i) = iand(psi_non_ref(k,2,i), not(active_sorb(k,2))) + end do + end do + + call sort_dets_ab(det_noactive, det_cepa0_idx, cepa0_shortcut, N_det_non_ref, N_int) + + do i=1,N_det_non_ref + det_cepa0(:,:,i) = psi_non_ref(:,:,det_cepa0_idx(i)) + end do + + cepa0_shortcut(0) = 1 + cepa0_shortcut(1) = 1 + do i=2,N_det_non_ref + if(.not. detEq(det_noactive(1,1,i), det_noactive(1,1,i-1), N_int)) then + cepa0_shortcut(0) += 1 + cepa0_shortcut(cepa0_shortcut(0)) = i + end if + end do + cepa0_shortcut(cepa0_shortcut(0)+1) = N_det_non_ref+1 + + if(.true.) then + do i=1,cepa0_shortcut(0) + n = cepa0_shortcut(i+1) - cepa0_shortcut(i) + call sort_dets_ab(det_cepa0(1,1,cepa0_shortcut(i)), idx, shortcut, n, N_int) + do k=1,n + idx(k) = det_cepa0_idx(cepa0_shortcut(i)-1+idx(k)) + end do + det_cepa0_idx(cepa0_shortcut(i):cepa0_shortcut(i)+n-1) = idx(:n) + end do + end if + + + do i=1,N_det_ref + do k=1, N_int + det_ref_active(k,1,i) = iand(psi_ref(k,1,i), active_sorb(k,1)) + det_ref_active(k,2,i) = iand(psi_ref(k,2,i), active_sorb(k,2)) + end do + end do + + do i=1,N_det_non_ref + do k=1, N_int + det_cepa0_active(k,1,i) = iand(psi_non_ref(k,1,det_cepa0_idx(i)), active_sorb(k,1)) + det_cepa0_active(k,2,i) = iand(psi_non_ref(k,2,det_cepa0_idx(i)), active_sorb(k,2)) + end do + end do + + do i=1,N_det_non_ref + if(.not. detEq(psi_non_ref(1,1,det_cepa0_idx(i)), det_cepa0(1,1,i),N_int)) stop "STOOOP" + end do + + searchance = 0d0 + child_num = 0 + do J = 1, N_det_ref + nlink(J) = 0 + do blok=1,cepa0_shortcut(0) + do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 + call get_excitation_degree(psi_ref(1,1,J),det_cepa0(1,1,k),degree,N_int) + if(degree <= 2) then + nlink(J) += 1 + linked(nlink(J),J) = k + child_num(k, J) = nlink(J) + blokMwen(nlink(J),J) = blok + searchance(J) += 1d0 + log(dfloat(cepa0_shortcut(blok+1) - cepa0_shortcut(blok))) + end if + end do + end do + end do + print *, "pre done" +END_PROVIDER + + +! BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] +! use bitmasks +! implicit none +! integer :: i,j,k +! double precision :: Hjk, Hki, Hij, pre(N_det_ref), wall +! integer :: i_state, degree, npre, ipre(N_det_ref), npres(N_det_ref) +! +! ! provide lambda_mrcc +! npres = 0 +! delta_cas = 0d0 +! call wall_time(wall) +! print *, "dcas ", wall +! do i_state = 1, N_states +! !!$OMP PARALLEL DO default(none) schedule(dynamic) private(pre,npre,ipre,j,k,Hjk,Hki,degree) shared(npres,lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref) +! do k=1,N_det_non_ref +! if(lambda_mrcc(i_state, k) == 0d0) cycle +! npre = 0 +! do i=1,N_det_ref +! call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki) +! if(Hki /= 0d0) then +! !!$OMP ATOMIC +! npres(i) += 1 +! npre += 1 +! ipre(npre) = i +! pre(npre) = Hki +! end if +! end do +! +! +! do i=1,npre +! do j=1,i +! !!$OMP ATOMIC +! delta_cas(ipre(i),ipre(j),i_state) += pre(i) * pre(j) * lambda_mrcc(i_state, k) +! end do +! end do +! end do +! !!$OMP END PARALLEL DO +! npre=0 +! do i=1,N_det_ref +! npre += npres(i) +! end do +! !stop +! do i=1,N_det_ref +! do j=1,i +! delta_cas(j,i,i_state) = delta_cas(i,j,i_state) +! end do +! end do +! end do +! +! call wall_time(wall) +! print *, "dcas", wall +! ! stop +! END_PROVIDER + + + BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ] + use bitmasks + implicit none + integer :: i,j,k + double precision :: Sjk,Hjk, Hki, Hij + !double precision, external :: get_dij + integer i_state, degree + + provide lambda_mrcc dIj + do i_state = 1, N_states + !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij) + do i=1,N_det_ref + do j=1,i + call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int) + delta_cas(i,j,i_state) = 0d0 + delta_cas_s2(i,j,i_state) = 0d0 + do k=1,N_det_non_ref + + call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) + call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk) + + delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) + delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) + end do + delta_cas(j,i,i_state) = delta_cas(i,j,i_state) + delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state) + end do + end do + !$OMP END PARALLEL DO + end do + END_PROVIDER + + + + +logical function isInCassd(a,Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: a(Nint,2) + integer(bit_kind) :: inac, virt + integer :: ni, i, deg + + + isInCassd = .false. + + deg = 0 + do i=1,2 + do ni=1,Nint + virt = iand(not(HF_bitmask(ni,i)), not(active_sorb(ni,i))) + deg += popcnt(iand(virt, a(ni,i))) + if(deg > 2) return + end do + end do + + deg = 0 + do i=1,2 + do ni=1,Nint + inac = iand(HF_bitmask(ni,i), not(active_sorb(ni,i))) + deg += popcnt(xor(iand(inac,a(ni,i)), inac)) + if(deg > 2) return + end do + end do + isInCassd = .true. +end function + + +subroutine getHP(a,h,p,Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: a(Nint,2) + integer, intent(out) :: h, p + integer(bit_kind) :: inac, virt + integer :: ni, i, deg + + + !isInCassd = .false. + h = 0 + p = 0 + + deg = 0 + lp : do i=1,2 + do ni=1,Nint + virt = iand(not(HF_bitmask(ni,i)), not(active_sorb(ni,i))) + deg += popcnt(iand(virt, a(ni,i))) + if(deg > 2) exit lp + end do + end do lp + p = deg + + deg = 0 + lh : do i=1,2 + do ni=1,Nint + inac = iand(HF_bitmask(ni,i), not(active_sorb(ni,i))) + deg += popcnt(xor(iand(inac,a(ni,i)), inac)) + if(deg > 2) exit lh + end do + end do lh + h = deg + !isInCassd = .true. +end function + + + BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ] + use bitmasks + implicit none + + integer :: i_state, i, i_I, J, k, degree, degree2, m, l, deg, ni + integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref) + logical :: ok + double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) + double precision :: contrib, contrib2, contrib_s2, contrib2_s2, HIIi, HJk, wall + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ + integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) + integer(bit_kind),allocatable :: sortRef(:,:,:) + integer, allocatable :: idx_sorted_bit(:) + integer, external :: get_index_in_psi_det_sorted_bit, searchDet + logical, external :: is_in_wavefunction, detEq + !double precision, external :: get_dij + integer :: II, blok + integer*8, save :: notf = 0 + + call wall_time(wall) + allocate(idx_sorted_bit(N_det), sortRef(N_int,2,N_det_ref)) + + sortRef(:,:,:) = det_ref_active(:,:,:) + call sort_det(sortRef, sortRefIdx, N_det_ref, N_int) + + idx_sorted_bit(:) = -1 + do i=1,N_det_non_ref + idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i + enddo + + ! To provide everything + contrib = dij(1, 1, 1) + + delta_mrcepa0_ii(:,:) = 0d0 + delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii_s2(:,:) = 0d0 + delta_mrcepa0_ij_s2(:,:,:) = 0d0 + + do i_state = 1, N_states + !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2) & + !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) & + !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & + !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) & + !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) + do blok=1,cepa0_shortcut(0) + do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 + do II=1,N_det_ref + call get_excitation_degree(psi_ref(1,1,II),psi_non_ref(1,1,det_cepa0_idx(i)),degree,N_int) + if (degree > 2 ) cycle + + do ni=1,N_int + made_hole(ni,1) = iand(det_ref_active(ni,1,II), xor(det_cepa0_active(ni,1,i), det_ref_active(ni,1,II))) + made_hole(ni,2) = iand(det_ref_active(ni,2,II), xor(det_cepa0_active(ni,2,i), det_ref_active(ni,2,II))) + + made_particle(ni,1) = iand(det_cepa0_active(ni,1,i), xor(det_cepa0_active(ni,1,i), det_ref_active(ni,1,II))) + made_particle(ni,2) = iand(det_cepa0_active(ni,2,i), xor(det_cepa0_active(ni,2,i), det_ref_active(ni,2,II))) + end do + + + kloop: do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 !i + !if(lambda_mrcc(i_state, det_cepa0_idx(k)) == 0d0) cycle + + do ni=1,N_int + if(iand(made_hole(ni,1), det_cepa0_active(ni,1,k)) /= 0) cycle kloop + if(iand(made_particle(ni,1), det_cepa0_active(ni,1,k)) /= made_particle(ni,1)) cycle kloop + if(iand(made_hole(ni,2), det_cepa0_active(ni,2,k)) /= 0) cycle kloop + if(iand(made_particle(ni,2), det_cepa0_active(ni,2,k)) /= made_particle(ni,2)) cycle kloop + end do + do ni=1,N_int + myActive(ni,1) = xor(det_cepa0_active(ni,1,k), made_hole(ni,1)) + myActive(ni,1) = xor(myActive(ni,1), made_particle(ni,1)) + myActive(ni,2) = xor(det_cepa0_active(ni,2,k), made_hole(ni,2)) + myActive(ni,2) = xor(myActive(ni,2), made_particle(ni,2)) + end do + + j = searchDet(sortRef, myActive, N_det_ref, N_int) + if(j == -1) then + cycle + end if + j = sortRefIdx(j) + !$OMP ATOMIC + notf = notf+1 + +! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) + contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + + if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then + contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + !$OMP ATOMIC + delta_mrcepa0_ii(J,i_state) -= contrib2 + delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 + else + contrib = contrib * 0.5d0 + contrib_s2 = contrib_s2 * 0.5d0 + end if + !$OMP ATOMIC + delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib + delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2 + + end do kloop + end do + end do + end do + !$OMP END PARALLEL DO + end do + deallocate(idx_sorted_bit) + call wall_time(wall) + print *, "cepa0", wall, notf + +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, delta_sub_ij, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_sub_ii, (N_det_ref, N_states) ] + use bitmasks + implicit none + + integer :: i_state, i, i_I, J, k, degree, degree2, l, deg, ni + integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_ + logical :: ok + double precision :: phase_Ji, phase_Ik, phase_Ii + double precision :: contrib, contrib2, delta_IJk, HJk, HIk, HIl + integer, dimension(0:2,2,2) :: exc_Ik, exc_Ji, exc_Ii + integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2) + integer, allocatable :: idx_sorted_bit(:) + integer, external :: get_index_in_psi_det_sorted_bit + + integer :: II, blok + + provide delta_cas lambda_mrcc + allocate(idx_sorted_bit(N_det)) + idx_sorted_bit(:) = -1 + do i=1,N_det_non_ref + idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i + enddo + + do i_state = 1, N_states + delta_sub_ij(:,:,:) = 0d0 + delta_sub_ii(:,:) = 0d0 + + provide mo_bielec_integrals_in_map + + + !$OMP PARALLEL DO default(none) schedule(dynamic,10) shared(delta_sub_ij, delta_sub_ii) & + !$OMP private(i, J, k, degree, degree2, l, deg, ni) & + !$OMP private(p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_) & + !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib2, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) & + !$OMP private(det_tmp, det_tmp2, II, blok) & + !$OMP shared(idx_sorted_bit, N_det_non_ref, N_det_ref, N_int, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef) & + !$OMP shared(i_state,lambda_mrcc, hf_bitmask, active_sorb) + do i=1,N_det_non_ref + if(mod(i,1000) == 0) print *, i, "/", N_det_non_ref + do J=1,N_det_ref + call get_excitation(psi_ref(1,1,J),psi_non_ref(1,1,i),exc_Ji,degree,phase_Ji,N_int) + if(degree == -1) cycle + + + do II=1,N_det_ref + call apply_excitation(psi_ref(1,1,II),exc_Ji,det_tmp,ok,N_int) + + if(.not. ok) cycle + l = get_index_in_psi_det_sorted_bit(det_tmp, N_int) + if(l == 0) cycle + l = idx_sorted_bit(l) + + call i_h_j(psi_ref(1,1,II), det_tmp, N_int, HIl) + + do k=1,N_det_non_ref + if(lambda_mrcc(i_state, k) == 0d0) cycle + call get_excitation(psi_ref(1,1,II),psi_non_ref(1,1,k),exc_Ik,degree2,phase_Ik,N_int) + + det_tmp(:,:) = 0_bit_kind + det_tmp2(:,:) = 0_bit_kind + + ok = .true. + do ni=1,N_int + det_tmp(ni,1) = iand(xor(HF_bitmask(ni,1), psi_non_ref(ni,1,k)), not(active_sorb(ni,1))) + det_tmp(ni,2) = iand(xor(HF_bitmask(ni,1), psi_non_ref(ni,1,i)), not(active_sorb(ni,1))) + ok = ok .and. (popcnt(det_tmp(ni,1)) + popcnt(det_tmp(ni,2)) == popcnt(xor(det_tmp(ni,1), det_tmp(ni,2)))) + + det_tmp(ni,1) = iand(xor(HF_bitmask(ni,2), psi_non_ref(ni,2,k)), not(active_sorb(ni,2))) + det_tmp(ni,2) = iand(xor(HF_bitmask(ni,2), psi_non_ref(ni,2,i)), not(active_sorb(ni,2))) + ok = ok .and. (popcnt(det_tmp(ni,1)) + popcnt(det_tmp(ni,2)) == popcnt(xor(det_tmp(ni,1), det_tmp(ni,2)))) + end do + + if(ok) cycle + + + call i_h_j(psi_ref(1,1,J), psi_non_ref(1,1,k), N_int, HJk) + call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,k), N_int, HIk) + if(HJk == 0) cycle + !assert HIk == 0 + delta_IJk = HJk * HIk * lambda_mrcc(i_state, k) + call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) + if(ok) cycle + contrib = delta_IJk * HIl * lambda_mrcc(i_state,l) + if(dabs(psi_ref_coef(II,i_state)).ge.1.d-3) then + contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state) + !$OMP ATOMIC + delta_sub_ii(II,i_state) -= contrib2 + else + contrib = contrib * 0.5d0 + endif + !$OMP ATOMIC + delta_sub_ij(II, i, i_state) += contrib + end do + end do + end do + end do + !$OMP END PARALLEL DO + end do + deallocate(idx_sorted_bit) +END_PROVIDER + + +subroutine set_det_bit(det, p, s) + implicit none + integer(bit_kind),intent(inout) :: det(N_int, 2) + integer, intent(in) :: p, s + integer :: ni, pos + + ni = (p-1)/bit_kind_size + 1 + pos = mod(p-1, bit_kind_size) + det(ni,s) = ibset(det(ni,s), pos) +end subroutine + + + BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ] +&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ] + implicit none + integer :: i,j + do i=1,N_det_ref + do j=1,N_det_non_ref + call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j)) + call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j)) + end do + end do +END_PROVIDER + + + +subroutine filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_miniList) + + use bitmasks + implicit none + + integer, intent(in) :: i_generator,n_selected, Nint + + integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) + integer :: i,j,k,m + logical :: is_in_wavefunction + integer,allocatable :: degree(:) + integer,allocatable :: idx(:) + logical :: good + + integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out) + integer, intent(out) :: N_tq + + integer :: nt,ni + logical, external :: is_connected_to, is_generable + + integer(bit_kind),intent(in) :: miniList(Nint,2,N_det_generators) + integer,intent(in) :: N_miniList + + allocate(degree(psi_det_size)) + allocate(idx(0:psi_det_size)) + N_tq = 0 + + i_loop : do i=1,N_selected + do k=1, N_minilist + if(is_generable(miniList(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop + end do + + ! Select determinants that are triple or quadruple excitations + ! from the ref + good = .True. + call get_excitation_degree_vector(psi_ref,det_buffer(1,1,i),degree,Nint,N_det_ref,idx) + !good=(idx(0) == 0) tant que degree > 2 pas retourné par get_excitation_degree_vector + do k=1,idx(0) + if (degree(k) < 3) then + good = .False. + exit + endif + enddo + if (good) then + if (.not. is_in_wavefunction(det_buffer(1,1,i),Nint)) then + N_tq += 1 + do k=1,N_int + tq(k,1,N_tq) = det_buffer(k,1,i) + tq(k,2,N_tq) = det_buffer(k,2,i) + enddo + endif + endif + enddo i_loop +end + + +subroutine filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microlist,ptr_microlist,N_microlist,key_mask) + + use bitmasks + implicit none + + integer, intent(in) :: i_generator,n_selected, Nint + + integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) + integer :: i,j,k,m + logical :: is_in_wavefunction + integer,allocatable :: degree(:) + integer,allocatable :: idx(:) + logical :: good + + integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out) + integer, intent(out) :: N_tq + + integer :: nt,ni + logical, external :: is_connected_to, is_generable + + integer(bit_kind),intent(in) :: microlist(Nint,2,*) + integer,intent(in) :: ptr_microlist(0:*) + integer,intent(in) :: N_microlist(0:*) + integer(bit_kind),intent(in) :: key_mask(Nint, 2) + + integer :: mobiles(2), smallerlist + + + allocate(degree(psi_det_size)) + allocate(idx(0:psi_det_size)) + N_tq = 0 + + i_loop : do i=1,N_selected + call getMobiles(det_buffer(1,1,i), key_mask, mobiles, Nint) + if(N_microlist(mobiles(1)) < N_microlist(mobiles(2))) then + smallerlist = mobiles(1) + else + smallerlist = mobiles(2) + end if + + if(N_microlist(smallerlist) > 0) then + do k=ptr_microlist(smallerlist), ptr_microlist(smallerlist)+N_microlist(smallerlist)-1 + if(is_generable(microlist(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop + end do + end if + + if(N_microlist(0) > 0) then + do k=1, N_microlist(0) + if(is_generable(microlist(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop + end do + end if + + ! Select determinants that are triple or quadruple excitations + ! from the ref + good = .True. + call get_excitation_degree_vector(psi_ref,det_buffer(1,1,i),degree,Nint,N_det_ref,idx) + !good=(idx(0) == 0) tant que degree > 2 pas retourné par get_excitation_degree_vector + do k=1,idx(0) + if (degree(k) < 3) then + good = .False. + exit + endif + enddo + if (good) then + if (.not. is_in_wavefunction(det_buffer(1,1,i),Nint)) then + N_tq += 1 + do k=1,N_int + tq(k,1,N_tq) = det_buffer(k,1,i) + tq(k,2,N_tq) = det_buffer(k,2,i) + enddo + endif + endif + enddo i_loop +end + + + + diff --git a/plugins/mrcc_selected/dressing_slave.irp.f b/plugins/mrcc_selected/dressing_slave.irp.f new file mode 100644 index 00000000..c2e5dd55 --- /dev/null +++ b/plugins/mrcc_selected/dressing_slave.irp.f @@ -0,0 +1,601 @@ +subroutine mrsc2_dressing_slave_tcp(i) + implicit none + integer, intent(in) :: i + BEGIN_DOC +! Task for parallel MR-SC2 + END_DOC + call mrsc2_dressing_slave(0,i) +end + + +subroutine mrsc2_dressing_slave_inproc(i) + implicit none + integer, intent(in) :: i + BEGIN_DOC +! Task for parallel MR-SC2 + END_DOC + call mrsc2_dressing_slave(1,i) +end + +subroutine mrsc2_dressing_slave(thread,iproc) + use f77_zmq + + implicit none + BEGIN_DOC +! Task for parallel MR-SC2 + END_DOC + integer, intent(in) :: thread, iproc +! integer :: j,l + integer :: rc + + integer :: worker_id, task_id + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) + + + + integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2 + integer :: n(2) + integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn + logical :: ok + double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al + double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) + double precision :: contrib, contrib_s2, wall, iwall + double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ + integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt + integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp + logical, external :: is_in_wavefunction, isInCassd, detEq + integer,allocatable :: komon(:) + logical :: komoned + !double precision, external :: get_dij + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + + allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) + allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) + allocate(komon(0:N_det_non_ref)) + + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + if (task_id == 0) exit + read (task,*) i_I, J, k1, k2 + do i_state=1, N_states + ci_inv(i_state) = 1.d0 / psi_ref_coef(i_I,i_state) + cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state) + end do + n = 0 + delta(:,0,:) = 0d0 + delta(:,:nlink(J),1) = 0d0 + delta(:,:nlink(i_I),2) = 0d0 + delta_s2(:,0,:) = 0d0 + delta_s2(:,:nlink(J),1) = 0d0 + delta_s2(:,:nlink(i_I),2) = 0d0 + komon(0) = 0 + komoned = .false. + + + + + do kk = k1, k2 + k = det_cepa0_idx(linked(kk, i_I)) + blok = blokMwen(kk, i_I) + + call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,degree,phase_Ik,N_int) + + if(J /= i_I) then + call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int) + if(.not. ok) cycle + + l = searchDet(det_cepa0(1,1,cepa0_shortcut(blok)), det_tmp2, cepa0_shortcut(blok+1)-cepa0_shortcut(blok), N_int) + if(l == -1) cycle + ll = cepa0_shortcut(blok)-1+l + l = det_cepa0_idx(ll) + ll = child_num(ll, J) + else + l = k + ll = kk + end if + + + if(.not. komoned) then + m = 0 + m2 = 0 + + do while(m < nlink(i_I) .and. m2 < nlink(J)) + m += 1 + m2 += 1 + if(linked(m, i_I) < linked(m2, J)) then + m2 -= 1 + cycle + else if(linked(m, i_I) > linked(m2, J)) then + m -= 1 + cycle + end if + i = det_cepa0_idx(linked(m, i_I)) + + if(h_cache(J,i) == 0.d0) cycle + if(h_cache(i_I,i) == 0.d0) cycle + + komon(0) += 1 + kn = komon(0) + komon(kn) = i + + do i_state = 1,N_states + dkI = h_cache(J,i) * dij(i_I, i, i_state) + dleat(i_state, kn, 1) = dkI + dleat(i_state, kn, 2) = dkI + + dkI = s2_cache(J,i) * dij(i_I, i, i_state) + dleat_s2(i_state, kn, 1) = dkI + dleat_s2(i_state, kn, 2) = dkI + end do + + end do + + komoned = .true. + end if + + integer :: hpmin(2) + hpmin(1) = 2 - HP(1,k) + hpmin(2) = 2 - HP(2,k) + + do m = 1, komon(0) + + i = komon(m) + if(HP(1,i) <= hpmin(1) .and. HP(2,i) <= hpmin(2) ) then + cycle + end if + + call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) + if(.not. ok) cycle + + do i_state = 1, N_states + contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) + contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) + delta(i_state,ll,1) += contrib + delta_s2(i_state,ll,1) += contrib_s2 + if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then + delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state) + delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state) + endif + + if(I_i == J) cycle + contrib = dij(J, l, i_state) * dleat(i_state, m, 1) + contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) + delta(i_state,kk,2) += contrib + delta_s2(i_state,kk,2) += contrib_s2 + if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then + delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state) + delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state) + end if + enddo !i_state + end do ! while + end do ! kk + + + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + +! end if + + enddo + + deallocate(delta) + + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + +end + + +subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Push integrals in the push socket + END_DOC + + integer, intent(in) :: i_I, J + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) + integer, intent(in) :: task_id + integer :: rc , i_state, i, kk, li + integer,allocatable :: idx(:,:) + integer :: n(2) + logical :: ok + + allocate(idx(N_det_non_ref,2)) + rc = f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send( zmq_socket_push, J, 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, J, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + + do kk=1,2 + n(kk)=0 + if(kk == 1) li = nlink(j) + if(kk == 2) li = nlink(i_I) + do i=1, li + ok = .false. + do i_state=1,N_states + if(delta(i_state, i, kk) /= 0d0) then + ok = .true. + exit + end if + end do + + if(ok) then + n(kk) += 1 +! idx(n,kk) = i + if(kk == 1) then + idx(n(1),1) = det_cepa0_idx(linked(i, J)) + else + idx(n(2),2) = det_cepa0_idx(linked(i, i_I)) + end if + + do i_state=1, N_states + delta(i_state, n(kk), kk) = delta(i_state, i, kk) + end do + end if + end do + + rc = f77_zmq_send( zmq_socket_push, n(kk), 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, n, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + if(n(kk) /= 0) then + rc = f77_zmq_send( zmq_socket_push, delta(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) ! delta(1,0,1) = delta_I delta(1,0,2) = delta_J + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) ! delta_s2(1,0,1) = delta_I delta_s2(1,0,2) = delta_J + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) + if (rc /= n(kk)*4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta, 8*n(kk), ZMQ_SNDMORE)' + stop 'error' + endif + end if + end do + + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, task_id, 4, 0)' + stop 'error' + endif + +! ! Activate is zmq_socket_push is a REQ +! integer :: idummy +! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) +! if (rc /= 4) then +! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' +! stop 'error' +! endif +end + + + +subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Push integrals in the push socket + END_DOC + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + integer, intent(out) :: i_I, J, n(2) + double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) + integer, intent(out) :: task_id + integer :: rc , i, kk + integer,intent(inout) :: idx(N_det_non_ref,2) + logical :: ok + + rc = f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_recv( zmq_socket_pull, J, 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, J, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + do kk = 1, 2 + rc = f77_zmq_recv( zmq_socket_pull, n(kk), 4, ZMQ_SNDMORE) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, n, 4, ZMQ_SNDMORE)' + stop 'error' + endif + + if(n(kk) /= 0) then + rc = f77_zmq_recv( zmq_socket_pull, delta(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) + if (rc /= n(kk)*4) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)' + stop 'error' + endif + end if + end do + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)' + stop 'error' + endif + + +! ! Activate is zmq_socket_pull is a REP +! integer :: idummy +! rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) +! if (rc /= 4) then +! print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' +! stop 'error' +! endif +end + + + +subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_) + use f77_zmq + implicit none + BEGIN_DOC +! Collects results from the AO integral calculation + END_DOC + + double precision,intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) + double precision,intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision,intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision,intent(inout) :: delta_ii_s2_(N_states,N_det_ref) + +! integer :: j,l + integer :: rc + + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer*8 :: control, accu + integer :: task_id, more + + integer :: I_i, J, l, i_state, n(2), kk + integer,allocatable :: idx(:,:) + + delta_ii_(:,:) = 0d0 + delta_ij_(:,:,:) = 0d0 + delta_ii_s2_(:,:) = 0d0 + delta_ij_s2_(:,:,:) = 0d0 + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + + allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) + + allocate(idx(N_det_non_ref,2)) + more = 1 + do while (more == 1) + + call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) + + + do l=1, n(1) + do i_state=1,N_states + delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1) + delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1) + end do + end do + + do l=1, n(2) + do i_state=1,N_states + delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2) + delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2) + end do + end do + + + if(n(1) /= 0) then + do i_state=1,N_states + delta_ii_(i_state,i_I) += delta(i_state,0,1) + delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1) + end do + end if + + if(n(2) /= 0) then + do i_state=1,N_states + delta_ii_(i_state,J) += delta(i_state,0,2) + delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2) + end do + end if + + + if (task_id /= 0) then + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + endif + + + enddo + deallocate( delta, delta_s2 ) + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + +end + + + + + BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ] + implicit none + + integer :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2 + integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, nex, nzer, ntot +! integer, allocatable :: linked(:,:), blokMwen(:, :), nlink(:) + logical :: ok + double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al, diI, hIi, hJi, delta_JI, dkI(N_states), HkI, ci_inv(N_states), dia_hla(N_states) + double precision :: contrib, wall, iwall ! , searchance(N_det_ref) + integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ + integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt + integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp + logical, external :: is_in_wavefunction, isInCassd, detEq + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer :: KKsize = 1000000 + + + call new_parallel_job(zmq_to_qp_run_socket,'mrsc2') + + + call wall_time(iwall) +! allocate(linked(N_det_non_ref, N_det_ref), blokMwen(N_det_non_ref, N_det_ref), nlink(N_det_ref)) + + +! searchance = 0d0 +! do J = 1, N_det_ref +! nlink(J) = 0 +! do blok=1,cepa0_shortcut(0) +! do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 +! call get_excitation_degree(psi_ref(1,1,J),det_cepa0(1,1,k),degree,N_int) +! if(degree <= 2) then +! nlink(J) += 1 +! linked(nlink(J),J) = k +! blokMwen(nlink(J),J) = blok +! searchance(J) += 1d0 + log(dfloat(cepa0_shortcut(blok+1) - cepa0_shortcut(blok))) +! end if +! end do +! end do +! end do + + + +! stop + nzer = 0 + ntot = 0 + do nex = 3, 0, -1 + print *, "los ",nex + do I_s = N_det_ref, 1, -1 +! if(mod(I_s,1) == 0) then +! call wall_time(wall) +! wall = wall-iwall +! print *, I_s, "/", N_det_ref, wall * (dfloat(N_det_ref) / dfloat(I_s)), wall, wall * (dfloat(N_det_ref) / dfloat(I_s))-wall +! end if + + + do J_s = 1, I_s + + call get_excitation_degree(psi_ref(1,1,J_s), psi_ref(1,1,I_s), degree, N_int) + if(degree /= nex) cycle + if(nex == 3) nzer = nzer + 1 + ntot += 1 +! if(degree > 3) then +! deg += 1 +! cycle +! else if(degree == -10) then +! KKsize = 100000 +! else +! KKsize = 1000000 +! end if + + + + if(searchance(I_s) < searchance(J_s)) then + i_I = I_s + J = J_s + else + i_I = J_s + J = I_s + end if + + KKsize = nlink(1) + if(nex == 0) KKsize = int(float(nlink(1)) / float(nlink(i_I)) * (float(nlink(1)) / 64d0)) + + !if(KKsize == 0) stop "ZZEO" + + do kk = 1 , nlink(i_I), KKsize + write(task,*) I_i, J, kk, int(min(kk+KKsize-1, nlink(i_I))) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + + ! do kk = 1 , nlink(i_I) + ! k = linked(kk,i_I) + ! blok = blokMwen(kk,i_I) + ! write(task,*) I_i, J, k, blok + ! call add_task_to_taskserver(zmq_to_qp_run_socket,task) + ! + ! enddo !kk + enddo !J + + enddo !I + end do ! nex + print *, "tasked" +! integer(ZMQ_PTR) ∷ collector_thread +! external ∷ ao_bielec_integrals_in_map_collector +! rc = pthread_create(collector_thread, mrsc2_dressing_collector) + print *, nzer, ntot, float(nzer) / float(ntot) + provide nproc + !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) + else + call mrsc2_dressing_slave_inproc(i) + endif + !$OMP END PARALLEL + +! rc = pthread_join(collector_thread) + call end_parallel_job(zmq_to_qp_run_socket, 'mrsc2') + + +END_PROVIDER + + + diff --git a/plugins/mrcc_selected/ezfio_interface.irp.f b/plugins/mrcc_selected/ezfio_interface.irp.f new file mode 100644 index 00000000..062af449 --- /dev/null +++ b/plugins/mrcc_selected/ezfio_interface.irp.f @@ -0,0 +1,61 @@ +! DO NOT MODIFY BY HAND +! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py +! from file /home/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg + + +BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] + implicit none + BEGIN_DOC +! Threshold on the convergence of the dressed CI energy + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcc_selected_thresh_dressed_ci(has) + if (has) then + call ezfio_get_mrcc_selected_thresh_dressed_ci(thresh_dressed_ci) + else + print *, 'mrcc_selected/thresh_dressed_ci not found in EZFIO file' + stop 1 + endif + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_it_max_dressed_ci ] + implicit none + BEGIN_DOC +! Maximum number of dressed CI iterations + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcc_selected_n_it_max_dressed_ci(has) + if (has) then + call ezfio_get_mrcc_selected_n_it_max_dressed_ci(n_it_max_dressed_ci) + else + print *, 'mrcc_selected/n_it_max_dressed_ci not found in EZFIO file' + stop 1 + endif + +END_PROVIDER + +BEGIN_PROVIDER [ integer, lambda_type ] + implicit none + BEGIN_DOC +! lambda type + END_DOC + + logical :: has + PROVIDE ezfio_filename + + call ezfio_has_mrcc_selected_lambda_type(has) + if (has) then + call ezfio_get_mrcc_selected_lambda_type(lambda_type) + else + print *, 'mrcc_selected/lambda_type not found in EZFIO file' + stop 1 + endif + +END_PROVIDER diff --git a/plugins/mrcc_selected/mrcc_selected.irp.f b/plugins/mrcc_selected/mrcc_selected.irp.f new file mode 100644 index 00000000..91592e62 --- /dev/null +++ b/plugins/mrcc_selected/mrcc_selected.irp.f @@ -0,0 +1,19 @@ +program mrsc2sub + implicit none + double precision, allocatable :: energy(:) + allocate (energy(N_states)) + + !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc + mrmode = 3 + + read_wf = .True. + SOFT_TOUCH read_wf + call print_cas_coefs + call set_generators_bitmasks_as_holes_and_particles + call run(N_states,energy) + if(do_pt2_end)then + call run_pt2(N_states,energy) + endif + deallocate(energy) +end + diff --git a/plugins/mrcc_selected/mrcepa0_general.irp.f b/plugins/mrcc_selected/mrcepa0_general.irp.f new file mode 100644 index 00000000..e3a2d1f5 --- /dev/null +++ b/plugins/mrcc_selected/mrcepa0_general.irp.f @@ -0,0 +1,245 @@ + + +subroutine run(N_st,energy) + implicit none + + integer, intent(in) :: N_st + double precision, intent(out) :: energy(N_st) + + integer :: i,j + + double precision :: E_new, E_old, delta_e + integer :: iteration + double precision :: E_past(4) + + integer :: n_it_mrcc_max + double precision :: thresh_mrcc + double precision, allocatable :: lambda(:) + allocate (lambda(N_states)) + + + thresh_mrcc = thresh_dressed_ci + n_it_mrcc_max = n_it_max_dressed_ci + + if(n_it_mrcc_max == 1) then + do j=1,N_states_diag + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors_dressed(i,j) + enddo + enddo + SOFT_TOUCH psi_coef ci_energy_dressed + call write_double(6,ci_energy_dressed(1),"Final MRCC energy") + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) + call save_wavefunction + energy(:) = ci_energy_dressed(:) + else + E_new = 0.d0 + delta_E = 1.d0 + iteration = 0 + lambda = 1.d0 + do while (delta_E > thresh_mrcc) + iteration += 1 + print *, '===========================' + print *, 'MRCEPA0 Iteration', iteration + print *, '===========================' + print *, '' + E_old = sum(ci_energy_dressed) + call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy") + call diagonalize_ci_dressed(lambda) + E_new = sum(ci_energy_dressed) + delta_E = dabs(E_new - E_old) + call save_wavefunction + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) + if (iteration >= n_it_mrcc_max) then + exit + endif + enddo + call write_double(6,ci_energy_dressed(1),"Final MRCEPA0 energy") + energy(:) = ci_energy_dressed(:) + endif +end + + +subroutine print_cas_coefs + implicit none + + integer :: i,j + print *, 'CAS' + print *, '===' + do i=1,N_det_cas + print *, (psi_cas_coef(i,j), j=1,N_states) + call debug_det(psi_cas(1,1,i),N_int) + enddo + call write_double(6,ci_energy(1),"Initial CI energy") + +end + + + + +subroutine run_pt2_old(N_st,energy) + implicit none + integer :: i,j,k + integer, intent(in) :: N_st + double precision, intent(in) :: energy(N_st) + double precision :: pt2_redundant(N_st), pt2(N_st) + double precision :: norm_pert(N_st),H_pert_diag(N_st) + + pt2_redundant = 0.d0 + pt2 = 0d0 + !if(lambda_mrcc_pt2(0) == 0) return + + print*,'Last iteration only to compute the PT2' + + print * ,'Computing the redundant PT2 contribution' + + if (mrmode == 1) then + + N_det_generators = lambda_mrcc_kept(0) + N_det_selectors = lambda_mrcc_kept(0) + + do i=1,N_det_generators + j = lambda_mrcc_kept(i) + do k=1,N_int + psi_det_generators(k,1,i) = psi_non_ref(k,1,j) + psi_det_generators(k,2,i) = psi_non_ref(k,2,j) + psi_selectors(k,1,i) = psi_non_ref(k,1,j) + psi_selectors(k,2,i) = psi_non_ref(k,2,j) + enddo + do k=1,N_st + psi_coef_generators(i,k) = psi_non_ref_coef(j,k) + psi_selectors_coef(i,k) = psi_non_ref_coef(j,k) + enddo + enddo + + else + + N_det_generators = N_det_non_ref + N_det_selectors = N_det_non_ref + + do i=1,N_det_generators + j = i + do k=1,N_int + psi_det_generators(k,1,i) = psi_non_ref(k,1,j) + psi_det_generators(k,2,i) = psi_non_ref(k,2,j) + psi_selectors(k,1,i) = psi_non_ref(k,1,j) + psi_selectors(k,2,i) = psi_non_ref(k,2,j) + enddo + do k=1,N_st + psi_coef_generators(i,k) = psi_non_ref_coef(j,k) + psi_selectors_coef(i,k) = psi_non_ref_coef(j,k) + enddo + enddo + + endif + + SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed + SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized + + call H_apply_mrcepa_PT2(pt2_redundant, norm_pert, H_pert_diag, N_st) + + print * ,'Computing the remaining contribution' + + threshold_selectors = max(threshold_selectors,threshold_selectors_pt2) + threshold_generators = max(threshold_generators,threshold_generators_pt2) + + N_det_generators = N_det_non_ref + N_det_ref + N_det_selectors = N_det_non_ref + N_det_ref + + psi_det_generators(:,:,:N_det_ref) = psi_ref(:,:,:N_det_ref) + psi_selectors(:,:,:N_det_ref) = psi_ref(:,:,:N_det_ref) + psi_coef_generators(:N_det_ref,:) = psi_ref_coef(:N_det_ref,:) + psi_selectors_coef(:N_det_ref,:) = psi_ref_coef(:N_det_ref,:) + + do i=N_det_ref+1,N_det_generators + j = i-N_det_ref + do k=1,N_int + psi_det_generators(k,1,i) = psi_non_ref(k,1,j) + psi_det_generators(k,2,i) = psi_non_ref(k,2,j) + psi_selectors(k,1,i) = psi_non_ref(k,1,j) + psi_selectors(k,2,i) = psi_non_ref(k,2,j) + enddo + do k=1,N_st + psi_coef_generators(i,k) = psi_non_ref_coef(j,k) + psi_selectors_coef(i,k) = psi_non_ref_coef(j,k) + enddo + enddo + + SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed + SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized + + call H_apply_mrcepa_PT2(pt2, norm_pert, H_pert_diag, N_st) + + + print *, "Redundant PT2 :",pt2_redundant + print *, "Full PT2 :",pt2 + print *, lambda_mrcc_kept(0), N_det, N_det_ref, psi_coef(1,1), psi_ref_coef(1,1) + pt2 = pt2 - pt2_redundant + + print *, 'Final step' + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + print *, 'PT2 = ', pt2 + print *, 'E = ', energy + print *, 'E+PT2 = ', energy+pt2 + print *, '-----' + + + call ezfio_set_mrcepa0_energy_pt2(energy(1)+pt2(1)) + +end + +subroutine run_pt2(N_st,energy) + implicit none + integer :: i,j,k + integer, intent(in) :: N_st + double precision, intent(in) :: energy(N_st) + double precision :: pt2(N_st) + double precision :: norm_pert(N_st),H_pert_diag(N_st) + + pt2 = 0d0 + !if(lambda_mrcc_pt2(0) == 0) return + + print*,'Last iteration only to compute the PT2' + + N_det_generators = N_det_cas + N_det_selectors = N_det_non_ref + + do i=1,N_det_generators + do k=1,N_int + psi_det_generators(k,1,i) = psi_ref(k,1,i) + psi_det_generators(k,2,i) = psi_ref(k,2,i) + enddo + do k=1,N_st + psi_coef_generators(i,k) = psi_ref_coef(i,k) + enddo + enddo + do i=1,N_det + do k=1,N_int + psi_selectors(k,1,i) = psi_det_sorted(k,1,i) + psi_selectors(k,2,i) = psi_det_sorted(k,2,i) + enddo + do k=1,N_st + psi_selectors_coef(i,k) = psi_coef_sorted(i,k) + enddo + enddo + + SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed + SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized + + call H_apply_mrcepa_PT2(pt2, norm_pert, H_pert_diag, N_st) + +! call ezfio_set_full_ci_energy_pt2(energy+pt2) + + print *, 'Final step' + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + print *, 'PT2 = ', pt2 + print *, 'E = ', energy + print *, 'E+PT2 = ', energy+pt2 + print *, '-----' + + call ezfio_set_mrcepa0_energy_pt2(energy(1)+pt2(1)) + +end + diff --git a/plugins/mrcepa0/.gitignore b/plugins/mrcepa0/.gitignore deleted file mode 100644 index 7d9ee55d..00000000 --- a/plugins/mrcepa0/.gitignore +++ /dev/null @@ -1,36 +0,0 @@ -# Automatically created by $QP_ROOT/scripts/module/module_handler.py -.ninja_deps -.ninja_log -AO_Basis -Bitmask -Davidson -Determinants -Electrons -Ezfio_files -Generators_full -Hartree_Fock -IRPF90_man -IRPF90_temp -Integrals_Bielec -Integrals_Monoelec -MOGuess -MO_Basis -MRCC_Utils -Makefile -Makefile.depend -Nuclei -Perturbation -Properties -Pseudo -Psiref_CAS -Psiref_Utils -Selectors_full -Utils -ZMQ -ezfio_interface.irp.f -irpf90.make -irpf90_entities -mrcc -mrcepa0 -mrsc2 -tags \ No newline at end of file diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 9f041cd3..c772e2aa 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -4,6 +4,8 @@ use bitmasks BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ] use bitmasks implicit none integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc @@ -14,11 +16,13 @@ use bitmasks delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 - print *, "Dij", dij(1,1,1) + delta_ij_s2_mrcc = 0d0 + delta_ii_s2_mrcc = 0d0 + PROVIDE dij provide hh_shortcut psi_det_size! lambda_mrcc !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & - !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) & + !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators allocate(buf(N_int, 2, N_det_non_ref)) @@ -37,7 +41,9 @@ use bitmasks end do n = n - 1 - if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask) + if(n /= 0) then + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask) + endif end do deallocate(buf) @@ -52,13 +58,15 @@ END_PROVIDER ! end subroutine -subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffer,Nint,key_mask) +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask) use bitmasks implicit none integer, intent(in) :: i_generator,n_selected, Nint double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision, intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref) integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) integer :: i,j,k,l,m @@ -68,8 +76,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),allocatable :: tq(:,:,:) integer :: N_tq, c_ref ,degree - double precision :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states) - double precision, allocatable :: dIa_hla(:,:) + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states) + double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) double precision :: haj, phase, phase2 double precision :: f(N_states), ci_inv(N_states) integer :: exc(0:2,2,2) @@ -82,7 +90,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer,allocatable :: idx_miniList(:) integer :: N_miniList, ni, leng - double precision, allocatable :: hij_cache(:) + double precision, allocatable :: hij_cache(:), sij_cache(:) integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:) integer, allocatable :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:) @@ -92,7 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe leng = max(N_det_generators, N_det_non_ref) - allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref)) + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint) call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) @@ -117,7 +125,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe deallocate(microlist, idx_microlist) - allocate (dIa_hla(N_states,N_det_non_ref)) + allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref)) ! |I> @@ -185,6 +193,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) + call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) enddo ! |I> do i_I=1,N_det_ref @@ -282,31 +291,36 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) hla = hij_cache(k_sd) + sla = sij_cache(k_sd) ! call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla) do i_state=1,N_states dIa_hla(i_state,k_sd) = dIa(i_state) * hla + dIa_sla(i_state,k_sd) = dIa(i_state) * sla enddo enddo call omp_set_lock( psi_ref_lock(i_I) ) do i_state=1,N_states - if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then + if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) + delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) enddo else delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) enddo endif enddo call omp_unset_lock( psi_ref_lock(i_I) ) enddo enddo - deallocate (dIa_hla,hij_cache) + deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) deallocate(miniList, idx_miniList) end @@ -315,6 +329,8 @@ end BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ] use bitmasks implicit none integer :: i, j, i_state @@ -325,13 +341,36 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i) enddo end do end do + + ! =-=-= BEGIN STATE AVERAGE +! do i = 1, N_det_ref +! delta_ii(:,i)= delta_ii_mrcc(1,i) +! delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i) +! do i_state = 2, N_states +! delta_ii(:,i) += delta_ii_mrcc(i_state,i) +! delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i) +! enddo +! do j = 1, N_det_non_ref +! delta_ij(:,j,i) = delta_ij_mrcc(1,j,i) +! delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i) +! do i_state = 2, N_states +! delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i) +! delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i) +! enddo +! end do +! end do +! delta_ij = delta_ij * (1.d0/dble(N_states)) +! delta_ii = delta_ii * (1.d0/dble(N_states)) + ! =-=-= END STATE AVERAGE ! ! do i = 1, N_det_ref ! delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state) @@ -343,10 +382,12 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_old(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i) enddo end do end do @@ -354,10 +395,12 @@ end do i = 1, N_det_ref do i_state = 1, N_states delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) + delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state) enddo do j = 1, N_det_non_ref do i_state = 1, N_states delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) + delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state) enddo end do end do @@ -547,28 +590,32 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ] use bitmasks implicit none integer :: i,j,k - double precision :: Hjk, Hki, Hij + double precision :: Sjk,Hjk, Hki, Hij !double precision, external :: get_dij integer i_state, degree provide lambda_mrcc dIj do i_state = 1, N_states - !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref,dij) + !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij) do i=1,N_det_ref do j=1,i call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int) delta_cas(i,j,i_state) = 0d0 + delta_cas_s2(i,j,i_state) = 0d0 do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) + call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk) delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) - !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int) + delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) end do delta_cas(j,i,i_state) = delta_cas(i,j,i_state) + delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state) end do end do !$OMP END PARALLEL DO @@ -649,6 +696,8 @@ end function BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ] &BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ] use bitmasks implicit none @@ -656,7 +705,7 @@ end function integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) - double precision :: contrib, contrib2, HIIi, HJk, wall + double precision :: contrib, contrib2, contrib_s2, contrib2_s2, HIIi, HJk, wall integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) integer(bit_kind),allocatable :: sortRef(:,:,:) @@ -681,14 +730,16 @@ end function ! To provide everything contrib = dij(1, 1, 1) - do i_state = 1, N_states - delta_mrcepa0_ii(:,:) = 0d0 - delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii(:,:) = 0d0 + delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii_s2(:,:) = 0d0 + delta_mrcepa0_ij_s2(:,:,:) = 0d0 - !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii) & - !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2) & + do i_state = 1, N_states + !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2) & + !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) & !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & - !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) & + !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) & !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) do blok=1,cepa0_shortcut(0) do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 @@ -731,16 +782,21 @@ end function ! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) - if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then + if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) !$OMP ATOMIC delta_mrcepa0_ii(J,i_state) -= contrib2 + delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 else contrib = contrib * 0.5d0 + contrib_s2 = contrib_s2 * 0.5d0 end if !$OMP ATOMIC delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib + delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2 end do kloop end do @@ -751,7 +807,7 @@ end function deallocate(idx_sorted_bit) call wall_time(wall) print *, "cepa0", wall, notf - !stop + END_PROVIDER @@ -839,7 +895,7 @@ END_PROVIDER call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) if(ok) cycle contrib = delta_IJk * HIl * lambda_mrcc(i_state,l) - if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then + if(dabs(psi_ref_coef(II,i_state)).ge.1.d-3) then contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state) !$OMP ATOMIC delta_sub_ii(II,i_state) -= contrib2 @@ -870,12 +926,14 @@ subroutine set_det_bit(det, p, s) end subroutine -BEGIN_PROVIDER [ double precision, h_, (N_det_ref,N_det_non_ref) ] + BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ] +&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ] implicit none integer :: i,j do i=1,N_det_ref do j=1,N_det_non_ref - call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_(i,j)) + call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j)) + call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j)) end do end do END_PROVIDER diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index f1d6f029..9e9fa65a 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -37,7 +37,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) @@ -47,8 +47,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) - double precision :: contrib, wall, iwall - double precision, allocatable :: dleat(:,:,:) + double precision :: contrib, contrib_s2, wall, iwall + double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp @@ -63,6 +63,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) + allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) allocate(komon(0:N_det_non_ref)) do @@ -74,10 +75,14 @@ subroutine mrsc2_dressing_slave(thread,iproc) cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state) end do !delta = 0.d0 + !delta_s2 = 0.d0 n = 0 delta(:,0,:) = 0d0 delta(:,:nlink(J),1) = 0d0 delta(:,:nlink(i_I),2) = 0d0 + delta_s2(:,0,:) = 0d0 + delta_s2(:,:nlink(J),1) = 0d0 + delta_s2(:,:nlink(i_I),2) = 0d0 komon(0) = 0 komoned = .false. @@ -121,8 +126,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) end if i = det_cepa0_idx(linked(m, i_I)) - if(h_(J,i) == 0.d0) cycle - if(h_(i_I,i) == 0.d0) cycle + if(h_cache(J,i) == 0.d0) cycle + if(h_cache(i_I,i) == 0.d0) cycle !ok = .false. !do i_state=1, N_states @@ -144,10 +149,13 @@ subroutine mrsc2_dressing_slave(thread,iproc) ! if(I_i == J) phase_Ii = phase_Ji do i_state = 1,N_states - dkI = h_(J,i) * dij(i_I, i, i_state)!get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,i), N_int) - !dkI = h_(J,i) * h_(i_I,i) * lambda_mrcc(i_state, i) + dkI = h_cache(J,i) * dij(i_I, i, i_state) dleat(i_state, kn, 1) = dkI dleat(i_state, kn, 2) = dkI + + dkI = s2_cache(J,i) * dij(i_I, i, i_state) + dleat_s2(i_state, kn, 1) = dkI + dleat_s2(i_state, kn, 2) = dkI end do end do @@ -173,26 +181,32 @@ subroutine mrsc2_dressing_slave(thread,iproc) !if(lambda_mrcc(i_state, i) == 0d0) cycle - !contrib = h_(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al + !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) + contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) delta(i_state,ll,1) += contrib + delta_s2(i_state,ll,1) += contrib_s2 if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state) + delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state) endif if(I_i == J) cycle - !contrib = h_(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al + !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al contrib = dij(J, l, i_state) * dleat(i_state, m, 1) + contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) delta(i_state,kk,2) += contrib + delta_s2(i_state,kk,2) += contrib_s2 if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state) + delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state) end if enddo !i_state end do ! while end do ! kk - call push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) ! end if @@ -208,7 +222,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) end -subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) +subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -218,6 +232,7 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) integer, intent(in) :: i_I, J integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(in) :: task_id integer :: rc , i_state, i, kk, li integer,allocatable :: idx(:,:) @@ -278,6 +293,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' stop 'error' endif + + rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) ! delta_s2(1,0,1) = delta_I delta_s2(1,0,2) = delta_J + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then @@ -305,7 +326,7 @@ end -subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) +subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -315,6 +336,7 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(out) :: i_I, J, n(2) double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(out) :: task_id integer :: rc , i, kk integer,intent(inout) :: idx(N_det_non_ref,2) @@ -346,9 +368,15 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) stop 'error' endif + rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then - print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta, n(kk)*4, ZMQ_SNDMORE)' + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)' stop 'error' endif end if @@ -372,7 +400,7 @@ end -subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) +subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_) use f77_zmq implicit none BEGIN_DOC @@ -381,11 +409,13 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) double precision,intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision,intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision,intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision,intent(inout) :: delta_ii_s2_(N_states,N_det_ref) ! integer :: j,l integer :: rc - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -401,49 +431,47 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) delta_ii_(:,:) = 0d0 delta_ij_(:,:,:) = 0d0 + delta_ii_s2_(:,:) = 0d0 + delta_ij_s2_(:,:,:) = 0d0 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate ( delta(N_states,0:N_det_non_ref,2) ) + allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) allocate(idx(N_det_non_ref,2)) more = 1 do while (more == 1) - call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) + call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) do l=1, n(1) do i_state=1,N_states delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1) + delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1) end do end do do l=1, n(2) do i_state=1,N_states delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2) + delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2) end do end do -! -! do l=1,nlink(J) -! do i_state=1,N_states -! delta_ij_(i_state,det_cepa0_idx(linked(l,J)),i_I) += delta(i_state,l,1) -! delta_ij_(i_state,det_cepa0_idx(linked(l,i_I)),j) += delta(i_state,l,2) -! end do -! end do -! if(n(1) /= 0) then do i_state=1,N_states delta_ii_(i_state,i_I) += delta(i_state,0,1) + delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1) end do end if if(n(2) /= 0) then do i_state=1,N_states delta_ii_(i_state,J) += delta(i_state,0,2) + delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2) end do end if @@ -454,7 +482,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) enddo - deallocate( delta ) + deallocate( delta, delta_s2 ) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) @@ -466,6 +494,8 @@ end BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ] implicit none integer :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2 @@ -574,10 +604,10 @@ end ! rc = pthread_create(collector_thread, mrsc2_dressing_collector) print *, nzer, ntot, float(nzer) / float(ntot) provide nproc - !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old) PRIVATE(i) NUM_THREADS(nproc+1) + !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() if (i==0) then - call mrsc2_dressing_collector(delta_ii_old,delta_ij_old) + call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) else call mrsc2_dressing_slave_inproc(i) endif diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f index a28d4be3..a5614942 100644 --- a/plugins/mrcepa0/mrcc.irp.f +++ b/plugins/mrcepa0/mrcc.irp.f @@ -16,7 +16,7 @@ program mrsc2sub psi_coef(i,j) = CI_eigenvectors(i,j) enddo enddo - TOUCH psi_coef + SOFT_TOUCH psi_coef endif call run(N_states,energy) if(do_pt2_end)then diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f index 09c35e52..1b2e2fcb 100644 --- a/plugins/mrcepa0/mrcepa0_general.irp.f +++ b/plugins/mrcepa0/mrcepa0_general.irp.f @@ -21,7 +21,7 @@ subroutine run(N_st,energy) n_it_mrcc_max = n_it_max_dressed_ci if(n_it_mrcc_max == 1) then - do j=1,N_states_diag + do j=1,N_states do i=1,N_det psi_coef(i,j) = CI_eigenvectors_dressed(i,j) enddo @@ -37,15 +37,21 @@ subroutine run(N_st,energy) lambda = 1.d0 do while (delta_E > thresh_mrcc) iteration += 1 - print *, '===========================' - print *, 'MRCEPA0 Iteration', iteration - print *, '===========================' + print *, '===============================================' + print *, 'MRCEPA0 Iteration', iteration, '/', n_it_mrcc_max + print *, '===============================================' print *, '' - E_old = sum(ci_energy_dressed) - call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy") + E_old = sum(ci_energy_dressed(1:N_states)) + do i=1,N_st + call write_double(6,ci_energy_dressed(i),"MRCEPA0 energy") + enddo call diagonalize_ci_dressed(lambda) - E_new = sum(ci_energy_dressed) - delta_E = dabs(E_new - E_old) + E_new = sum(ci_energy_dressed(1:N_states)) + delta_E = (E_new - E_old)/dble(N_states) + print *, '' + call write_double(6,thresh_mrcc,"thresh_mrcc") + call write_double(6,delta_E,"delta_E") + delta_E = dabs(delta_E) call save_wavefunction call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) if (iteration >= n_it_mrcc_max) then diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg index b7c67465..7724400f 100644 --- a/src/Davidson/EZFIO.cfg +++ b/src/Davidson/EZFIO.cfg @@ -15,3 +15,16 @@ type: Strictly_positive_int doc: Number of micro-iterations before re-contracting default: 10 interface: ezfio,provider,ocaml + +[state_following] +type: logical +doc: If true, the states are re-ordered to match the input states +default: False +interface: ezfio,provider,ocaml + +[disk_based_davidson] +type: logical +doc: If true, disk space is used to store the vectors +default: False +interface: ezfio,provider,ocaml + diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index c70a086c..dccc8ee5 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -45,8 +45,11 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d !$OMP END DO !$OMP END PARALLEL - call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) -! call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + if (disk_based_davidson) then + call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + else + call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + endif do i=1,N_st_diag s2_out(i) = S2_jj(i) enddo @@ -84,8 +87,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(in) :: H_jj(sze) - double precision, intent(inout) :: S2_jj(sze) - integer, intent(in) :: iunit + double precision, intent(inout) :: S2_jj(sze) + integer, intent(in) :: iunit double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) @@ -99,7 +102,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:), U(:,:), S(:,:) + double precision, allocatable :: W(:,:), U(:,:), S(:,:), overlap(:,:) double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem @@ -108,17 +111,19 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s double precision :: to_print(3,N_st) double precision :: cpu, wall integer :: shift, shift2, itermax + double precision :: r1, r2 + logical :: state_ok(N_st_diag*davidson_sze_max) include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda if (N_st_diag*3 > sze) then - print *, 'error in Davidson :' - print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 - stop -1 + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif - + PROVIDE nuclear_repulsion expected_s2 - + call write_time(iunit) call wall_time(wall) call cpu_time(cpu) @@ -137,7 +142,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' @@ -145,31 +150,32 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s write_buffer = trim(write_buffer)//' ================ =========== ===========' enddo write(iunit,'(A)') trim(write_buffer) - - integer, external :: align_double + + integer, external :: align_double sze_8 = align_double(sze) - - itermax = min(davidson_sze_max, sze/N_st_diag) + + itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) allocate( & - W(sze_8,N_st_diag*itermax), & - U(sze_8,N_st_diag*itermax), & - S(sze_8,N_st_diag*itermax), & - h(N_st_diag*itermax,N_st_diag*itermax), & - y(N_st_diag*itermax,N_st_diag*itermax), & - s_(N_st_diag*itermax,N_st_diag*itermax), & - s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + W(sze_8,N_st_diag*itermax), & + U(sze_8,N_st_diag*itermax), & + S(sze_8,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*itermax), & - s2(N_st_diag*itermax), & + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + overlap(N_st_diag*itermax, N_st_diag*itermax), & lambda(N_st_diag*itermax)) - h = 0.d0 - s_ = 0.d0 - s_tmp = 0.d0 + h = 0.d0 U = 0.d0 W = 0.d0 S = 0.d0 y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 ASSERT (N_st > 0) @@ -183,21 +189,21 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - double precision :: r1, r2 do k=N_st+1,N_st_diag - do i=1,sze - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) - enddo + u_in(k,k) = 10.d0 + do i=1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo enddo do k=1,N_st_diag call normalize(u_in(1,k),sze) enddo - - + + do while (.not.converged) do k=1,N_st_diag @@ -205,12 +211,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s U(i,k) = u_in(i,k) enddo enddo - + do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter - + call ortho_qr(U,size(U,1),sze,shift2) ! Compute |W_k> = \sum_i |i> @@ -233,8 +239,49 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s 0.d0, s_, size(s_,1)) +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + ! Diagonalize h ! ------------- + call lapack_diag(lambda,y,h,size(h,1),shift2) ! Compute S2 for each eigenvector @@ -255,24 +302,72 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo if (s2_eig) then - logical :: state_ok(N_st_diag*davidson_sze_max) - do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + do k=1,size(state_ok) + state_ok(k) = .True. enddo + endif + + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + + if (state_following) then + + integer :: order(N_st_diag) + double precision :: cmax + + overlap = -1.d0 do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) endif enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + endif @@ -291,10 +386,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + U(i,shift2+k) = & + (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) + )/max(H_jj(i) - lambda (k),1.d-2) enddo + if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion @@ -339,7 +436,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s deallocate ( & W, residual_norm, & - U, & + U, overlap, & c, S, & h, & y, s_, s_tmp, & @@ -378,8 +475,8 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(in) :: H_jj(sze) - double precision, intent(inout) :: S2_jj(sze) - integer, intent(in) :: iunit + double precision, intent(inout) :: S2_jj(sze) + integer, intent(in) :: iunit double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) @@ -393,7 +490,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz integer :: k_pairs, kl integer :: iter2 - double precision, pointer :: W(:,:), U(:,:), S(:,:) + double precision, pointer :: W(:,:), U(:,:), S(:,:), overlap(:,:) double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem @@ -401,18 +498,19 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall + logical :: state_ok(N_st_diag*davidson_sze_max) integer :: shift, shift2, itermax include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda if (N_st_diag*3 > sze) then - print *, 'error in Davidson :' - print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 - stop -1 + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif - + PROVIDE nuclear_repulsion expected_s2 - + call write_time(iunit) call wall_time(wall) call cpu_time(cpu) @@ -431,7 +529,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' @@ -439,51 +537,52 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz write_buffer = trim(write_buffer)//' ================ =========== ===========' enddo write(iunit,'(A)') trim(write_buffer) - - integer, external :: align_double - integer :: fd(3) - type(c_ptr) :: c_pointer(3) + + integer, external :: align_double + integer :: fd(3) + type(c_ptr) :: c_pointer(3) sze_8 = align_double(sze) - + itermax = min(davidson_sze_max, sze/N_st_diag) - call mmap( & - trim(ezfio_work_dir)//'U', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + call mmap( & + trim(ezfio_work_dir)//'U', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & 8, fd(1), .False., c_pointer(1)) call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) ) - call mmap( & - trim(ezfio_work_dir)//'W', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + call mmap( & + trim(ezfio_work_dir)//'W', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & 8, fd(2), .False., c_pointer(2)) call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) ) - call mmap( & - trim(ezfio_work_dir)//'S', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + call mmap( & + trim(ezfio_work_dir)//'S', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & 8, fd(3), .False., c_pointer(3)) call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) ) allocate( & - h(N_st_diag*itermax,N_st_diag*itermax), & - y(N_st_diag*itermax,N_st_diag*itermax), & - s_(N_st_diag*itermax,N_st_diag*itermax), & - s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + overlap(N_st_diag*itermax, N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*itermax), & - s2(N_st_diag*itermax), & + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & lambda(N_st_diag*itermax)) - h = 0.d0 - s_ = 0.d0 - s_tmp = 0.d0 + h = 0.d0 U = 0.d0 W = 0.d0 S = 0.d0 y = 0.d0 - - + s_ = 0.d0 + s_tmp = 0.d0 + + ASSERT (N_st > 0) ASSERT (N_st_diag >= N_st) ASSERT (sze > 0) @@ -497,6 +596,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz double precision :: r1, r2 do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 do i=1,sze call random_number(r1) r1 = dsqrt(-2.d0*dlog(r1)) @@ -546,6 +646,45 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz 0.d0, s_(shift+1,1), size(s_,1)) enddo +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + ! Diagonalize h ! ------------- @@ -568,25 +707,74 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz s2(k) = s_(k,k) + S_z2_Sz enddo + if (s2_eig) then - logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + state_ok(k) = .True. + endif + + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + + if (state_following) then + + ! Compute overlap with U_in + ! ------------------------- + + integer :: order(N_st_diag) + double precision :: cmax + overlap = -1.d0 do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo enddo - do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo + do k=1,N_st + cmax = -1.d0 + do i=1,shift2 + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,shift2 + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) endif enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + endif @@ -604,11 +792,31 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz ! ----------------------------------------- do k=1,N_st_diag - do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) - enddo + if (state_ok(k)) then + do i=1,sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) + enddo + else + ! Randomize components with bad + do i=1,sze-2,2 + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + U(i+1,shift2+k) = r1*dsin(r2) + enddo + do i=sze-2+1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + enddo + endif + if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion @@ -665,7 +873,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz deallocate ( & residual_norm, & - c, & + c, overlap, & h, & y, s_, s_tmp, & lambda & diff --git a/src/Davidson/diagonalize_CI.irp.f b/src/Davidson/diagonalize_CI.irp.f index 3b2c9ed0..e1b67438 100644 --- a/src/Davidson/diagonalize_CI.irp.f +++ b/src/Davidson/diagonalize_CI.irp.f @@ -40,6 +40,7 @@ END_PROVIDER double precision, allocatable :: e_array(:) integer, allocatable :: iorder(:) + PROVIDE threshold_davidson ! Guess values for the "N_states" states of the CI_eigenvectors do j=1,min(N_states,N_det) do i=1,N_det diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index dd5ab1ab..117e704e 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -88,9 +88,12 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,1) - do sh2=sh,shortcut(0,1) - exa = 0 - do ni=1,Nint + do sh2=1,shortcut(0,1) + exa = popcnt(xor(version(1,sh,1), version(1,sh2,1))) + if(exa > 2) then + cycle + end if + do ni=2,Nint exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) end do if(exa > 2) then @@ -99,29 +102,27 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,1),shortcut(sh+1,1)-1 org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if do ni=1,Nint sorted_i(ni) = sorted(ni,i,1) enddo - do j=shortcut(sh2,1),endi + jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 org_j = sort_idx(j,1) - ext = exa - do ni=1,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - end do - if(ext <= 4) then - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - do istate=1,N_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - enddo + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if(ext > 4) then + cycle jloop endif - enddo + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if(ext > 4) then + cycle jloop + endif + end do + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + do istate=1,N_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + enddo + enddo jloop enddo enddo enddo @@ -131,19 +132,19 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) - do j=shortcut(sh,2),i-1 + do j=shortcut(sh,2),shortcut(sh+1,2)-1 org_j = sort_idx(j,2) - ext = 0 - do ni=1,Nint + ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) + do ni=2,Nint ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) end do - if(ext == 4) then - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - do istate=1,N_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - enddo - end if + if(ext /= 4) then + cycle + endif + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + do istate=1,N_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + enddo end do end do enddo @@ -313,7 +314,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: blockb, blockb2, istep double precision :: ave_workload, workload, target_workload_inv - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st N_st_8 = align_double(N_st) @@ -328,49 +329,62 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) v_0 = 0.d0 s_0 = 0.d0 - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(i,istate) - enddo - enddo - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(static,1) + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,2),istate) + enddo + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) - do j=shortcut(sh,2),i-1 + do j=shortcut(sh,2),shortcut(sh+1,2)-1 org_j = sort_idx(j,2) - ext = 0 - do ni=1,Nint + ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) + if (ext > 4) cycle + do ni=2,Nint ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) + if (ext > 4) exit end do if(ext == 4) then call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) enddo end if end do end do enddo - !$OMP END DO NOWAIT + !$OMP END DO + + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,1),istate) + enddo + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,1) - !$OMP DO SCHEDULE(static,1) - do sh2=sh,shortcut(0,1) + do sh2=1,shortcut(0,1) + if (sh==sh2) cycle + exa = 0 do ni=1,Nint exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) @@ -381,44 +395,102 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,1),shortcut(sh+1,1)-1 org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if do ni=1,Nint sorted_i(ni) = sorted(ni,i,1) enddo - do j=shortcut(sh2,1),endi - ext = exa - do ni=1,Nint + do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit end do if(ext <= 4) then org_j = sort_idx(j,1) call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) if (hij /= 0.d0) then do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) enddo endif if (ext /= 2) then call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) if (s2 /= 0.d0) then do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) enddo endif endif endif enddo + + enddo + enddo + + exa = 0 + + do i=shortcut(sh,1),shortcut(sh+1,1)-1 + org_i = sort_idx(i,1) + do ni=1,Nint + sorted_i(ni) = sorted(ni,i,1) + enddo + + do j=shortcut(sh,1),i-1 + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit + end do + if(ext <= 4) then + org_j = sort_idx(j,1) + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + if (hij /= 0.d0) then + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + enddo + endif + if (ext /= 2) then + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + if (s2 /= 0.d0) then + do istate=1,n_st + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + endif + endif + endif + enddo + + do j=i+1,shortcut(sh+1,1)-1 + if (i==j) cycle + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit + end do + if(ext <= 4) then + org_j = sort_idx(j,1) + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + if (hij /= 0.d0) then + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + enddo + endif + if (ext /= 2) then + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + if (s2 /= 0.d0) then + do istate=1,n_st + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + endif + endif + endif enddo enddo - !$OMP END DO NOWAIT enddo + !$OMP END DO !$OMP CRITICAL (u0Hu0) do istate=1,N_st diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 39b0f58e..bed3327d 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -438,8 +438,12 @@ end do i=1,N_states psi_coef_min(i) = minval(psi_coef(:,i)) psi_coef_max(i) = maxval(psi_coef(:,i)) - abs_psi_coef_min(i) = dabs(psi_coef_min(i)) - abs_psi_coef_max(i) = dabs(psi_coef_max(i)) + abs_psi_coef_min(i) = minval( dabs(psi_coef(:,i)) ) + abs_psi_coef_max(i) = maxval( dabs(psi_coef(:,i)) ) + call write_double(6,psi_coef_max(i), 'Max coef') + call write_double(6,psi_coef_min(i), 'Min coef') + call write_double(6,abs_psi_coef_max(i), 'Max abs coef') + call write_double(6,abs_psi_coef_min(i), 'Min abs coef') enddo END_PROVIDER @@ -760,37 +764,85 @@ subroutine apply_excitation(det, exc, res, ok, Nint) ok = .false. degree = exc(0,1,1) + exc(0,1,2) - if(.not. (degree > 0 .and. degree <= 2)) then - print *, degree - print *, "apply ex" - STOP - endif - - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) +! call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) +! INLINE + select case(degree) + case(2) + if (exc(0,1,1) == 2) then + h1 = exc(1,1,1) + h2 = exc(2,1,1) + p1 = exc(1,2,1) + p2 = exc(2,2,1) + s1 = 1 + s2 = 1 + else if (exc(0,1,2) == 2) then + h1 = exc(1,1,2) + h2 = exc(2,1,2) + p1 = exc(1,2,2) + p2 = exc(2,2,2) + s1 = 2 + s2 = 2 + else + h1 = exc(1,1,1) + h2 = exc(1,1,2) + p1 = exc(1,2,1) + p2 = exc(1,2,2) + s1 = 1 + s2 = 2 + endif + case(1) + if (exc(0,1,1) == 1) then + h1 = exc(1,1,1) + h2 = 0 + p1 = exc(1,2,1) + p2 = 0 + s1 = 1 + s2 = 0 + else + h1 = exc(1,1,2) + h2 = 0 + p1 = exc(1,2,2) + p2 = 0 + s1 = 2 + s2 = 0 + endif + case(0) + h1 = 0 + p1 = 0 + h2 = 0 + p2 = 0 + s1 = 0 + s2 = 0 + case default + print *, degree + print *, "apply ex" + STOP + end select +! END INLINE + res = det - ii = (h1-1)/bit_kind_size + 1 - pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1) ! mod 64 - if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return + ii = ishft(h1-1,-bit_kind_shift) + 1 + pos = h1-1-ishft(ii-1,bit_kind_shift) + if(iand(det(ii, s1), ibset(0_bit_kind, pos)) == 0_8) return res(ii, s1) = ibclr(res(ii, s1), pos) - ii = (p1-1)/bit_kind_size + 1 - pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1) + ii = ishft(p1-1,-bit_kind_shift) + 1 + pos = p1-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return res(ii, s1) = ibset(res(ii, s1), pos) if(degree == 2) then - ii = (h2-1)/bit_kind_size + 1 - pos = mod(h2-1, 64)!iand(h2-1,bit_kind_size-1) + ii = ishft(h2-1,-bit_kind_shift) + 1 + pos = h2-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s2), ishft(1_bit_kind, pos)) == 0_8) return res(ii, s2) = ibclr(res(ii, s2), pos) - ii = (p2-1)/bit_kind_size + 1 - pos = mod(p2-1, 64)!iand(p2-1,bit_kind_size-1) + ii = ishft(p2-1,-bit_kind_shift) + 1 + pos = p2-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s2), ishft(1_bit_kind, pos)) /= 0_8) return res(ii, s2) = ibset(res(ii, s2), pos) endif - ok = .true. end subroutine @@ -809,14 +861,14 @@ subroutine apply_particles(det, s1, p1, s2, p2, res, ok, Nint) res = det if(p1 /= 0) then - ii = (p1-1)/bit_kind_size + 1 - pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1) + ii = ishft(p1-1,-bit_kind_shift) + 1 + pos = p1-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return res(ii, s1) = ibset(res(ii, s1), pos) end if - ii = (p2-1)/bit_kind_size + 1 - pos = mod(p2-1, 64)!iand(p2-1,bit_kind_size-1) + ii = ishft(p2-1,-bit_kind_shift) + 1 + pos = p2-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s2), ishft(1_bit_kind, pos)) /= 0_8) return res(ii, s2) = ibset(res(ii, s2), pos) @@ -838,14 +890,14 @@ subroutine apply_holes(det, s1, h1, s2, h2, res, ok, Nint) res = det if(h1 /= 0) then - ii = (h1-1)/bit_kind_size + 1 - pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1) + ii = ishft(h1-1,-bit_kind_shift) + 1 + pos = h1-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return res(ii, s1) = ibclr(res(ii, s1), pos) end if - ii = (h2-1)/bit_kind_size + 1 - pos = mod(h2-1, 64)!iand(h2-1,bit_kind_size-1) + ii = ishft(h2-1,-bit_kind_shift) + 1 + pos = h2-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s2), ishft(1_bit_kind, pos)) == 0_8) return res(ii, s2) = ibclr(res(ii, s2), pos) @@ -865,8 +917,8 @@ subroutine apply_particle(det, s1, p1, res, ok, Nint) ok = .false. res = det - ii = (p1-1)/bit_kind_size + 1 - pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1) + ii = ishft(p1-1,-bit_kind_shift) + 1 + pos = p1-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return res(ii, s1) = ibset(res(ii, s1), pos) @@ -887,8 +939,8 @@ subroutine apply_hole(det, s1, h1, res, ok, Nint) ok = .false. res = det - ii = (h1-1)/bit_kind_size + 1 - pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1) + ii = ishft(h1-1,-bit_kind_shift) + 1 + pos = h1-1-ishft(ii-1,bit_kind_shift) if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return res(ii, s1) = ibclr(res(ii, s1), pos) diff --git a/src/MRPT_Utils/MRPT_Utils_main.irp.f b/src/MRPT_Utils/MRPT_Utils_main.irp.f deleted file mode 100644 index fb17f054..00000000 --- a/src/MRPT_Utils/MRPT_Utils_main.irp.f +++ /dev/null @@ -1,3 +0,0 @@ - program MRPT_Utils_main - print *, "I'm a core module, I need an main! (maybe a stupid rule)" - end program MRPT_Utils_main diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index 4a83582f..80260233 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -622,7 +622,7 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) istep = ishft(iend-ibegin,-1) idx = ibegin + istep - do while (istep > 16) + do while (istep > 64) idx = ibegin + istep ! TODO : Cache misses if (cache_key < X(idx)) then @@ -660,8 +660,8 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) endif enddo idx = ibegin - if (min(iend_in,sze) > ibegin+16) then - iend = ibegin+16 + if (min(iend_in,sze) > ibegin+64) then + iend = ibegin+64 do while (cache_key > X(idx)) idx = idx+1 end do @@ -730,7 +730,7 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in istep = ishft(iend-ibegin,-1) idx = ibegin + istep - do while (istep > 16) + do while (istep > 64) idx = ibegin + istep if (cache_key < X(idx)) then iend = idx @@ -771,8 +771,8 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in enddo idx = ibegin value = Y(idx) - if (min(iend_in,sze) > ibegin+16) then - iend = ibegin+16 + if (min(iend_in,sze) > ibegin+64) then + iend = ibegin+64 do while (cache_key > X(idx)) idx = idx+1 value = Y(idx) diff --git a/tests/bats/cassd.bats b/tests/bats/cassd.bats index a1f1a736..2a8fabc2 100644 --- a/tests/bats/cassd.bats +++ b/tests/bats/cassd.bats @@ -13,7 +13,7 @@ source $QP_ROOT/tests/bats/common.bats.sh qp_set_mo_class $INPUT -core "[1]" -inact "[2,5]" -act "[3,4,6,7]" -virt "[8-24]" qp_run cassd_zmq $INPUT energy="$(ezfio get cas_sd_zmq energy_pt2)" - eq $energy -76.2311177912495 2.E-5 + eq $energy -76.231084536315 5.E-5 ezfio set determinants n_det_max 2048 ezfio set determinants read_wf True @@ -21,6 +21,6 @@ source $QP_ROOT/tests/bats/common.bats.sh qp_run cassd_zmq $INPUT ezfio set determinants read_wf False energy="$(ezfio get cas_sd_zmq energy)" - eq $energy -76.2300888408526 2.E-5 + eq $energy -76.2300887947446 2.E-5 } diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats index ed69681f..dc9e0bb4 100644 --- a/tests/bats/mrcepa0.bats +++ b/tests/bats/mrcepa0.bats @@ -16,7 +16,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.238562120457431 1.e-4 + eq $energy -76.23752746236 1.e-4 } @test "MRCC H2O cc-pVDZ" { @@ -28,12 +28,11 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set determinants threshold_generators 1. ezfio set determinants threshold_selectors 1. ezfio set determinants read_wf True - ezfio set determinants read_wf True ezfio set mrcepa0 lambda_type 0 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.238527498388962 1.e-4 + eq $energy -76.237469267705 2.e-4 } @test "MRSC2 H2O cc-pVDZ" { @@ -45,11 +44,11 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set determinants threshold_generators 1. ezfio set determinants threshold_selectors 1. ezfio set determinants read_wf True - ezfio set mrcepa0 lambda_type 0 + ezfio set mrcepa0 lambda_type 1 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.235833732594187 1.e-4 + eq $energy -76.2347764009137 2.e-4 } @test "MRCEPA0 H2O cc-pVDZ" { @@ -61,10 +60,10 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set determinants threshold_generators 1. ezfio set determinants threshold_selectors 1. ezfio set determinants read_wf True - ezfio set mrcepa0 lambda_type 0 + ezfio set mrcepa0 lambda_type 1 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2418799284763 1.e-4 + eq $energy -76.2406942855164 2.e-4 } diff --git a/tests/bats/pseudo.bats b/tests/bats/pseudo.bats index a20b0842..4b374d76 100644 --- a/tests/bats/pseudo.bats +++ b/tests/bats/pseudo.bats @@ -48,6 +48,6 @@ function run_FCI_ZMQ() { @test "FCI H2O VDZ pseudo" { qp_set_mo_class h2o_pseudo.ezfio -core "[1]" -act "[2-12]" -del "[13-23]" - run_FCI_ZMQ h2o_pseudo.ezfio 2000 -0.170399597228904E+02 -0.170400168816800E+02 + run_FCI_ZMQ h2o_pseudo.ezfio 2000 -17.0399584106077 -17.0400170044515 } diff --git a/tests/run_tests.sh b/tests/run_tests.sh index c56f5602..3ac452ad 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -11,7 +11,7 @@ mrcepa0.bats #foboci.bats -export QP_PREFIX="timeout -s 9 300" +export QP_PREFIX="timeout -s 9 600" #export QP_TASK_DEBUG=1 rm -rf work output