diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 42710ce5..85daf7db 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -48,6 +48,7 @@ jobs: ./configure -i docopt || : ./configure -i resultsFile || : ./configure -i bats || : + ./configure -i trexio-nohdf5 || : ./configure -c ./config/gfortran_debug.cfg - name: Compilation run: | diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index 14019e5d..ba37f5dd 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 - name: zlib run: | ./configure -i zlib || echo OK @@ -50,6 +50,12 @@ jobs: - name: bats run: | ./configure -i bats || echo OK + - name: trexio-nohdf5 + run: | + ./configure -i trexio-nohdf5 || echo OK + - name: trexio + run: | + ./configure -i trexio || echo OK - name: Final check run: | ./configure -c config/gfortran_debug.cfg diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index bec11f5e..00000000 --- a/.travis.yml +++ /dev/null @@ -1,52 +0,0 @@ -#sudo: true -#before_script: -# - sudo apt-get update -q -# - sudo apt-get remove curl -# - sudo apt-get remove zlib1g-dev -# - sudo apt-get install autoconf -# - sudo rm /usr/local/bin/bats - -os: linux - -dist: bionic - -sudo: false - -compiler: gfortran - -addons: - apt: - packages: - - gfortran - - gcc - - libatlas-base-dev -# - liblapack-dev -# - libblas-dev - - wget - -env: - - OPAMROOT=$HOME/.opam - -cache: - directories: - - $HOME/.opam/ - - $HOME/cache - -language: python -python: - - "3.7" - -stages: - - configuration - - compilation - - testing - -jobs: - include: - - stage: configuration - script: travis/configuration.sh - - stage: compilation - script: travis/compilation.sh - - stage: testing - script: travis/testing.sh - diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 9b579146..3bd02898 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -9,15 +9,23 @@ - Configure adapted for ARM - Added many types of integrals - Accelerated four-index transformation - -*** TODO: take from dev - - [ ] Added GTOs with complex exponent - - Updated version of f77-zmq - Added transcorrelated SCF - Added transcorrelated CIPSI + - Added CCSD and CCSD(T) + - Added MO localization + - Changed coupling parameters for ROHF + - General Davidson algorithm + - Accelerated restore_symmetry + - Point charges in the Hamiltonian + - Removed cryptokit dependency in OCaml + - Using now standard convention in RDM + - Added molecular properties + - [ ] Added GTOs with complex exponent + +*** TODO: take from dev + - Updated version of f77-zmq - Started to introduce shells in AOs - Added ECMD UEG functional - - General Davidson algorithm * Version 2.2 diff --git a/bin/qp_test b/bin/qp_test index 288b7291..d3a188fb 100755 --- a/bin/qp_test +++ b/bin/qp_test @@ -46,7 +46,7 @@ def main(arguments): append_bats(dirname, filenames) else: for (dirname, _, filenames) in os.walk(os.getcwd(), followlinks=False): - if "IRPF90_temp" not in dirname: + if "IRPF90_temp" not in dirname and "external" not in dirname: append_bats(dirname, filenames) l_bats = [y for _, y in sorted(l_bats)] @@ -67,6 +67,7 @@ def main(arguments): os.system(test+" python3 bats_to_sh.py "+bats_file+ "| bash") else: +# print(" ".join(["bats", "--verbose-run", "--trace", bats_file])) subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ) diff --git a/config/ifort_2021_xHost.cfg b/config/ifort_2021_xHost.cfg index 1161833b..9170b059 100644 --- a/config/ifort_2021_xHost.cfg +++ b/config/ifort_2021_xHost.cfg @@ -6,7 +6,7 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : ifort -fpic +FC : ifort -fpic -diag-disable 5462 LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=64 -DINTEL diff --git a/configure b/configure index 5b50d0d7..66bc9419 100755 --- a/configure +++ b/configure @@ -9,6 +9,8 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX +TREXIO_VERSION=2.3.2 + # Force GCC instead of ICC for dependencies export CC=gcc @@ -189,7 +191,7 @@ if [[ "${PACKAGES}.x" != ".x" ]] ; then fi if [[ ${PACKAGES} = all ]] ; then - PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats" + PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio" fi @@ -203,6 +205,33 @@ for PACKAGE in ${PACKAGES} ; do mv ninja "\${QP_ROOT}"/bin/ EOF + elif [[ ${PACKAGE} = trexio-nohdf5 ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} --without-hdf5 + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files + tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz + mv ninja "\${QP_ROOT}"/bin/ +EOF + elif [[ ${PACKAGE} = trexio ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files +EOF + elif [[ ${PACKAGE} = gmp ]] ; then @@ -232,7 +261,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external - tar --gunzip --extract --file qp2-dependencies/f77-zmq-4.3.2.tar.gz + tar --gunzip --extract --file qp2-dependencies/f77-zmq-4.3.?.tar.gz cd f77-zmq-* ./configure --prefix=\$QP_ROOT export ZMQ_H="\$QP_ROOT"/include/zmq.h @@ -338,6 +367,12 @@ if [[ ${ZEROMQ} = $(not_found) ]] ; then fail fi +TREXIO=$(find_lib -ltrexio) +if [[ ${TREXIO} = $(not_found) ]] ; then + error "TREXIO (trexio,trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" + fail +fi + F77ZMQ=$(find_lib -lzmq -lf77zmq -lpthread) if [[ ${F77ZMQ} = $(not_found) ]] ; then error "Fortran binding of ZeroMQ (f77zmq) is not installed." diff --git a/data/basis/none b/data/basis/none new file mode 100644 index 00000000..df5d59f1 --- /dev/null +++ b/data/basis/none @@ -0,0 +1,5 @@ +$DATA + +HYDROGEN + +$END diff --git a/etc/qp.rc b/etc/qp.rc index c56661c7..9eec4570 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -110,6 +110,11 @@ function qp() unset COMMAND ;; + "test") + shift + qp_test $@ + ;; + *) which "qp_$1" &> /dev/null if [[ $? -eq 0 ]] ; then @@ -183,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in * ; do [[ -f ${i}/ezfio/.version ]] && echo $i ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) @@ -215,10 +220,15 @@ _qp_Complete() return 0 ;; esac;; + test) + COMPREPLY=( $(compgen -W "-v -a " -- $cur ) ) + return 0 + ;; *) COMPREPLY=( $(compgen -W 'plugins set_file \ unset_file man \ create_ezfio \ + test \ convert_output_to_ezfio \ -h update' -- $cur ) ) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 95d37a7a..841089ea 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -247,8 +247,7 @@ end = struct let read () = - if (Ezfio.has_ao_basis_ao_basis ()) then - begin + try let result = { ao_basis = read_ao_basis (); ao_num = read_ao_num () ; @@ -267,9 +266,8 @@ end = struct |> MD5.to_string |> Ezfio.set_ao_basis_ao_md5 ; Some result - end - else - None + with + | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) ;; diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 4583b118..8e452762 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -478,6 +478,7 @@ let run ?o b au c d m p cart xyz_file = let nmax = Nucl_number.get_max () in + let rec do_work (accu:(Atom.t*Nucl_number.t) list) (n:int) = function | [] -> accu | e::tail -> @@ -520,141 +521,144 @@ let run ?o b au c d m p cart xyz_file = in let long_basis = Long_basis.of_basis basis in let ao_num = List.length long_basis in - Ezfio.set_ao_basis_ao_num ao_num; - Ezfio.set_ao_basis_ao_basis b; - Ezfio.set_basis_basis b; - let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis - and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis - and ao_power= - let l = list_map (fun (x,_,_) -> x) long_basis in - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) - in - let ao_prim_num_max = List.fold_left (fun s x -> - if x > s then x - else s) 0 ao_prim_num - in - let gtos = - list_map (fun (_,x,_) -> x) long_basis - in - - let create_expo_coef ec = - let coefs = - begin match ec with - | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> - AO_coef.to_float coef) x.Gto.lc) gtos - | `Expos -> list_map (fun x-> - list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc) gtos - end + if ao_num > 0 then + begin + Ezfio.set_ao_basis_ao_num ao_num; + Ezfio.set_ao_basis_ao_basis b; + Ezfio.set_basis_basis b; + let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis + and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis + and ao_power= + let l = list_map (fun (x,_,_) -> x) long_basis in + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) in - let rec get_n n accu = function - | [] -> List.rev accu - | h::tail -> - let y = - begin match List.nth_opt h n with - | Some x -> x - | None -> 0. + let ao_prim_num_max = List.fold_left (fun s x -> + if x > s then x + else s) 0 ao_prim_num + in + let gtos = + list_map (fun (_,x,_) -> x) long_basis + in + + let create_expo_coef ec = + let coefs = + begin match ec with + | `Coefs -> list_map (fun x-> + list_map (fun (_,coef) -> + AO_coef.to_float coef) x.Gto.lc) gtos + | `Expos -> list_map (fun x-> + list_map (fun (prim,_) -> AO_expo.to_float + prim.GaussianPrimitive.expo) x.Gto.lc) gtos end - in - get_n n (y::accu) tail + in + let rec get_n n accu = function + | [] -> List.rev accu + | h::tail -> + let y = + begin match List.nth_opt h n with + | Some x -> x + | None -> 0. + end + in + get_n n (y::accu) tail + in + let rec build accu = function + | n when n=ao_prim_num_max -> accu + | n -> build ( accu @ (get_n n [] coefs) ) (n+1) + in + build [] 0 in - let rec build accu = function - | n when n=ao_prim_num_max -> accu - | n -> build ( accu @ (get_n n [] coefs) ) (n+1) - in - build [] 0 - in - let ao_coef = create_expo_coef `Coefs - and ao_expo = create_expo_coef `Expos - in - let () = - let shell_num = List.length basis in - let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = - list_map ( fun (g,_) -> g.Gto.lc ) basis - in - let ang_mom = - list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> - let x, _ = List.hd l in - Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int - ) lc - in - let expo = - list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc - |> List.concat - in - let coef = - list_map (fun l -> - list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l - ) lc - |> List.concat - in - let shell_prim_num = - list_map List.length lc - in - let shell_idx = - let rec make_list n accu = function - | 0 -> accu - | i -> make_list n (n :: accu) (i-1) + let ao_coef = create_expo_coef `Coefs + and ao_expo = create_expo_coef `Expos in - let rec aux count accu = function - | [] -> List.rev accu - | l::rest -> - let new_l = make_list count accu (List.length l) in - aux (count+1) new_l rest - in - aux 1 [] lc - in - let prim_num = List.length coef in - Ezfio.set_basis_typ "Gaussian"; - Ezfio.set_basis_shell_num shell_num; - Ezfio.set_basis_prim_num prim_num ; - Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); - Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; - Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; - Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] - ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) - ) ; - Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| nucl_num |] - ~data:( - list_map (fun (_,n) -> Nucl_number.to_int n) basis - |> List.fold_left (fun accu i -> - match accu with - | [] -> [(1,i)] - | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) - ) [] - |> List.rev - |> List.map fst - )) ; - Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:coef) ; - Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:expo) ; + let () = + let shell_num = List.length basis in + let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = + list_map ( fun (g,_) -> g.Gto.lc ) basis + in + let ang_mom = + list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> + let x, _ = List.hd l in + Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int + ) lc + in + let expo = + list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc + |> List.concat + in + let coef = + list_map (fun l -> + list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l + ) lc + |> List.concat + in + let shell_prim_num = + list_map List.length lc + in + let shell_idx = + let rec make_list n accu = function + | 0 -> accu + | i -> make_list n (n :: accu) (i-1) + in + let rec aux count accu = function + | [] -> List.rev accu + | l::rest -> + let new_l = make_list count accu (List.length l) in + aux (count+1) new_l rest + in + aux 1 [] lc + in + let prim_num = List.length coef in + Ezfio.set_basis_typ "Gaussian"; + Ezfio.set_basis_shell_num shell_num; + Ezfio.set_basis_prim_num prim_num ; + Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); + Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; + Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; + Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] + ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) + ) ; + Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| nucl_num |] + ~data:( + list_map (fun (_,n) -> Nucl_number.to_int n) basis + |> List.fold_left (fun accu i -> + match accu with + | [] -> [(1,i)] + | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) + ) [] + |> List.rev + |> List.map fst + )) ; + Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:coef) ; + Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:expo) ; - Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; - Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; - Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; - Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; - Ezfio.set_ao_basis_ao_cartesian(cart); - in - match Input.Ao_basis.read () with - | None -> failwith "Error in basis" - | Some x -> Input.Ao_basis.write x + Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; + Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; + Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; + Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; + Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; + Ezfio.set_ao_basis_ao_cartesian(cart); + in + match Input.Ao_basis.read () with + | None -> failwith "Error in basis" + | Some x -> Input.Ao_basis.write x + end in let () = try write_file () with @@ -781,7 +785,7 @@ If a file with the same name as the basis set exists, this file will be read. O run ?o:output basis au charge dummy multiplicity pseudo cart xyz_filename ) with - | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt +(* | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt *) | Command_line.Error txt -> Printf.eprintf "Command line error: %s\n%!" txt diff --git a/scripts/Hn.py b/scripts/Hn.py new file mode 100644 index 00000000..0f938510 --- /dev/null +++ b/scripts/Hn.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import sys +from math import * +arg = sys.argv +#f = open('data_dft','r') +n = int(sys.argv[1]) +r = float(sys.argv[2]) +f = open('H'+str(n)+'_'+str(r)+'.xyz','w') +string=str(n)+"\n" +f.write(string) +string="\n" +f.write(string) +for i in range(n): + x = r * cos(2.* i* pi/n) + y = r * sin(2.* i* pi/n) + z = 0. + string="H "+str(x)+" "+str(y)+" "+str(z)+"\n" + f.write(string) + +#lines = f.readlines() +#cipsi_dft= [] +# +#dissoc = [] +#dissoc.append(float(-76.0179223470363)) +#dissoc.append(float(-76.0592367866993)) +#dissoc.append(float(-76.0678739715659)) +#delta_e = [] +# +#for line in lines: +# data = line.split() +# if(len(data)>0): +# dft=float(data[1]) +# fci=float(data[2]) +# e=fci+dft +# cipsi_dft.append(e) +# +#print(*cipsi_dft,sep=" & ") +# +#for i in 0,1,2: +# delta_e.append(1000.*(dissoc[i] - cipsi_dft[i])) +# +#print(*delta_e,sep=" & ") +# diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index aad85778..606fd0f6 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -25,7 +25,7 @@ except ImportError: "quantum_package.rc")) print("\n".join(["", "Error:", "source %s" % f, ""])) - sys.exit(1) + raise # Compress path def comp_path(path): @@ -38,7 +38,7 @@ def comp_path(path): from qp_path import QP_ROOT, QP_SRC, QP_EZFIO -LIB = " -lz" +LIB = " -lz -ltrexio" EZFIO_LIB = join("$QP_ROOT", "lib", "libezfio_irp.a") ZMQ_LIB = join("$QP_ROOT", "lib", "libf77zmq.a") + " " + join("$QP_ROOT", "lib", "libzmq.a") + " -lstdc++ -lrt -ldl" ROOT_BUILD_NINJA = join("$QP_ROOT", "config", "build.ninja") diff --git a/scripts/ezfio_interface/ezfio_generate_provider.py b/scripts/ezfio_interface/ezfio_generate_provider.py index 6b49955b..a282b834 100755 --- a/scripts/ezfio_interface/ezfio_generate_provider.py +++ b/scripts/ezfio_interface/ezfio_generate_provider.py @@ -52,7 +52,7 @@ BEGIN_PROVIDER [ %(type)s, %(name)s %(size)s ] %(test_null_size)s call ezfio_has_%(ezfio_dir)s_%(ezfio_name)s(has) if (has) then - write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..' +! write(6,'(A)') '.. >>>>> [ IO READ: %(name)s ] <<<<< ..' call ezfio_get_%(ezfio_dir)s_%(ezfio_name)s(%(name)s) else print *, '%(ezfio_dir)s/%(ezfio_name)s not found in EZFIO file' @@ -117,7 +117,7 @@ END_PROVIDER output = self.output name = self.name l_write = ["", - " call write_time(%(output)s)", + "! call write_time(%(output)s)", ""] self.write = "\n".join(l_write) % locals() @@ -129,7 +129,7 @@ END_PROVIDER write = self.write_correspondance[self.type] l_write = ["", - " call write_time(%(output)s)", + "! call write_time(%(output)s)", " call %(write)s(%(output)s, %(name)s, &", " '%(name)s')", ""] diff --git a/scripts/ezfio_interface/qp_edit_template b/scripts/ezfio_interface/qp_edit_template index 4218456d..fe718a50 100644 --- a/scripts/ezfio_interface/qp_edit_template +++ b/scripts/ezfio_interface/qp_edit_template @@ -172,25 +172,23 @@ let run check_only ?ndet ?state ezfio_filename = (* Reorder basis set *) begin - let aos = - match Input.Ao_basis.read() with - | Some x -> x - | _ -> assert false - in - let ordering = Input.Ao_basis.ordering aos in - let test = Array.copy ordering in - Array.sort compare test ; - if test <> ordering then - begin - Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; - let new_aos = Input.Ao_basis.reorder aos in - Input.Ao_basis.write new_aos; - match Input.Mo_basis.read() with - | None -> () - | Some mos -> - let new_mos = Input.Mo_basis.reorder mos ordering in - Input.Mo_basis.write new_mos - end + match Input.Ao_basis.read() with + | Some aos -> + let ordering = Input.Ao_basis.ordering aos in + let test = Array.copy ordering in + Array.sort compare test ; + if test <> ordering then + begin + Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; + let new_aos = Input.Ao_basis.reorder aos in + Input.Ao_basis.write new_aos; + match Input.Mo_basis.read() with + | None -> () + | Some mos -> + let new_mos = Input.Mo_basis.reorder mos ordering in + Input.Mo_basis.write new_mos + end + | _ -> () end; begin diff --git a/scripts/get_fci_conv.sh b/scripts/get_fci_conv.sh new file mode 100755 index 00000000..8b0f5ac2 --- /dev/null +++ b/scripts/get_fci_conv.sh @@ -0,0 +1,7 @@ +file=$1 +grep "N_det =" $1 | cut -d "=" -f 2 > N_det_tmp +grep "E =" $file | cut -d "=" -f 2 > E_tmp +grep "E+PT2 =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+PT2_tmp +grep "E+rPT2 =" $file | cut -d "=" -f 2 | cut -d "+" -f 1 > E+rPT2_tmp +paste N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp | column -s ' ' -t > $file.conv_fci +rm N_det_tmp E_tmp E+PT2_tmp E+rPT2_tmp diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh new file mode 100755 index 00000000..643f3ac0 --- /dev/null +++ b/scripts/get_fci_tc_conv.sh @@ -0,0 +1,2 @@ +file=$1 +grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py new file mode 100755 index 00000000..7e7f1d67 --- /dev/null +++ b/scripts/qp_exc_energy.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +# Computes the error on the excitation energy of a CIPSI run. + +def student(p,df): + import scipy + from scipy.stats import t + return t.ppf(p, df) + + +def chi2cdf(x,k): + import scipy + import scipy.stats + return scipy.stats.chi2.cdf(x,k) + + +def jarque_bera(data): + + n = max(len(data), 2) + norm = 1./ sum( [ w for (_,w) in data ] ) + + mu = sum( [ w* x for (x,w) in data ] ) * norm + sigma2 = sum( [ w*(x-mu)**2 for (x,w) in data ] ) * norm + if sigma2 > 0.: + S = ( sum( [ w*(x-mu)**3 for (x,w) in data ] ) * norm ) / sigma2**(3./2.) + K = ( sum( [ w*(x-mu)**4 for (x,w) in data ] ) * norm ) / sigma2**2 + else: + S = 0. + K = 0. + + # Value of the Jarque-Bera test + JB = n/6. * (S**2 + 1./4. * (K-3.)**2) + + # Probability that the data comes from a Gaussian distribution + p = 1. - chi2cdf(JB,2) + + return JB, mu, sqrt(sigma2/(n-1)), p + + + +to_eV = 27.2107362681 +import sys, os +import scipy +import scipy.stats +from math import sqrt, gamma, exp +import qp_json + + +def read_data(ezfio_filename,state): + """ Read energies and PT2 from input file """ + data = qp_json.load_last(ezfio_filename) + for method in data.keys(): + x = data[method] + lines = x + + print(f"State: {state}") + + gs = [] + es = [] + + for l in lines: + try: + pt2_0 = l['states'][0]['pt2'] + e_0 = l['states'][0]['energy'] + pt2_1 = l['states'][state]['pt2'] + e_1 = l['states'][state]['energy'] + gs.append( (e_0, pt2_0) ) + es.append( (e_1, pt2_1) ) + except: pass + + def f(p_1, p0, p1): + e, pt2 = p0 + y0, x0 = p_1 + y1, x1 = p1 + try: + alpha = (y1-y0)/(x0-x1) + except ZeroDivisionError: + alpha = 1. + return [e, pt2, alpha] + + for l in (gs, es): + p_1, p0, p1 = l[0], l[0], l[1] + l[0] = f(p_1, p0, p1) + + for i in range(1,len(l)-1): + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[i+1] + l[i] = f(p_1, p0, p1) + + i = len(l)-1 + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[-1] + l[i] = f(p_1, p0, p1) + + return [ x+y for x,y in zip(gs,es) ] + + +def compute(data): + + d = [] + for e0, p0, a0, e1, p1, a1 in data: + x = (e1+p1)-(e0+p0) + w = 1./sqrt(p0**2 + p1**2) + bias = (a1-1.)*p1 - (a0-1.)*p0 + d.append( (x,w,bias) ) + + x = [] + target = (scipy.stats.norm.cdf(1.)-0.5)*2 # = 0.6827 + + print("| %2s | %8s | %8s | %8s | %8s | %8s |"%( "N", "DE", "+/-", "bias", "P(G)", "J")) + print("|----+----------+----------+----------+----------+----------|") + xmax = (0.,0.,0.,0.,0.,0,0.) + for i in range(len(data)-1): + jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] ) + bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] ) + mu = (mu+0.5*bias) * to_eV + sigma = sigma * to_eV + bias = bias * to_eV + n = len(data[i:]) + beta = student(0.5*(1.+target/p) ,n) + err = sigma * beta + 0.5*abs(bias) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |"%( n, mu, err, bias, p, jb)) + if n < 3 : + continue + y = (err, p, mu, err, jb,n,bias) + if p > xmax[1]: xmax = y + if p < 0.8: + continue + x.append(y) + + x = sorted(x) + + print("|----+----------+----------+----------+----------+----------|") + if x != []: + xmax = x[0] + _, p, mu, err, jb, n, bias = xmax + beta = student(0.5*(1.+target/p),n) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |\n"%(n, mu, err, bias, p, jb)) + + return mu, err, bias, p + +ezfio_filename = sys.argv[1] +print(ezfio_filename) +if len(sys.argv) > 2: + state = int(sys.argv[2]) +else: + state = 1 +data = read_data(ezfio_filename,state) +mu, err, bias, _ = compute(data) +print(" %s: %8.3f +/- %5.3f eV\n"%(ezfio_filename, mu, err)) + +import numpy as np +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ] ] ) +E0 = np.linalg.solve(A,B)[1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ] ] ) +E1 = np.linalg.solve(A,B)[1] +average_2 = (E1-E0)*to_eV + +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ], + [ data[-3][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ], + [ data[-3][0] ] ] ) +E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ], + [ data[-3][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ], + [ data[-3][3] ] ] ) +E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +average_3 = (E1-E0)*to_eV + +exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV +error_2 = abs(average_2 - average_3) +error_3 = abs(average_3 - exc) +print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2)) +print(" largest wf: %.3f +/- %.3f "%(average_3, error_3)) + + diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py new file mode 100755 index 00000000..200ab7aa --- /dev/null +++ b/scripts/qp_extract_cipsi_data.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +import qp_json +import sys + +if len(sys.argv) == 1: + print(f"syntax: {sys.argv[0]} EZFIO_FILE") + +d = qp_json.load_all(sys.argv[1]) + +k = [ x for x in d.keys() ] +k.sort() + +print("# Energy PT2 PT2_err rPT2 rPT2_err exFCI\n") +for f in k: + try: + j = d[f]["fci"] + except: + continue + + print(f"# {f}") + for e in j: + + out = f" {e['n_det']:8d}" + + nstates = len(e["states"]) + for ee in e["states"]: + try: + exc_energy = ee['ex_energy'][0] + except: + exc_energy = 0. + out += f" {ee['energy']:16.8f} {ee['pt2']:e} {ee['pt2_err']:e} {ee['rpt2']:e} {ee['rpt2_err']:e} {exc_energy:16.8f}" + print(out) + + print("\n") + + diff --git a/scripts/script_fci_tc.sh b/scripts/script_fci_tc.sh new file mode 100755 index 00000000..26ef2aaf --- /dev/null +++ b/scripts/script_fci_tc.sh @@ -0,0 +1,33 @@ + source ~/qp2/quantum_package.rc + alpha=1.8 + input=O + basis=cc-pvdz + mult=3 + output=${input}_${basis}_al_${alpha} + qp create_ezfio -b ${basis} ${input}.xyz -m $mult + qp run scf + qp set perturbation pt2_max 0.0001 + qp set_frozen_core + +########## FCI CALCULATION FOR REFERENCE + qp run fci | tee ${EZFIO_FILE}.fci.out + qp run sort_wf + mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_fci.wf_sorted +########### TC SCF CALCULATION + qp reset -d + qp set ao_two_e_erf_ints mu_erf 0.87 + qp set tc_keywords j1b_type 3 + qp set tc_keywords j1b_pen "[${alpha}]" + qp set tc_keywords bi_ortho True + qp set tc_keywords test_cycle_tc True + qp set tc_keywords write_tc_integ True + qp set tc_keywords read_tc_integ False + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp set tc_keywords write_tc_integ False + qp set tc_keywords read_tc_integ True +############ TC-FCI CALCULATION + qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.fci_tc_bi_ortho.out + grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" ${EZFIO_FILE}.fci_tc_bi_ortho.out | cut -d "=" -f 2 > data_al_$alpha + qp run sort_wf + mv ${EZFIO_FILE}.wf_sorted ${EZFIO_FILE}_tc_fci.wf_sorted + diff --git a/scripts/utility/qp_json.py b/scripts/utility/qp_json.py new file mode 100644 index 00000000..09ffe1be --- /dev/null +++ b/scripts/utility/qp_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +import os +import json + +def fix_json(s): + """Properly termitates an incomplete JSON file""" + + s = s.replace(' ','') + s = s.replace('\n','') + s = s.replace('\t','') + s = s.replace(",{}",'') + tmp = [ c for c in s if c in "[]{}" ] + tmp = "".join(tmp) + tmp_old = "" + while tmp != tmp_old: + tmp_old = tmp + tmp = tmp.replace("{}","") + tmp = tmp.replace("[]","") + while s[-1] in [ ',', '\n', ' ', '\t' ]: + s = s[:-1] + tmp = [ c for c in tmp ] + tmp.reverse() + for c in tmp: + if c == '[': s += "]" + elif c == '{': s += "}" + return s + + +def load(filename): + """Loads a JSON file after calling the fix_json function.""" + with open(filename,'r') as f: + data = f.read() + new_data = fix_json(data) + return json.loads(new_data) + + +def load_all(ezfio_filename): + """Loads all JSON files of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + for filename in [ x for x in os.listdir(prefix) if x.endswith(".json")]: + d[filename] = load(prefix+filename) + return d + + +def load_last(ezfio_filename): + """Loads last JSON file of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + l = [ x for x in os.listdir(prefix) if x.endswith(".json")] + l.sort() + filename = l[-1] + print(filename) + return load(prefix+filename) + + +def fix(ezfio_filename): + """Fixes all JSON files in an EZFIO.""" + d = load_all(ezfio_filename) + prefix = ezfio_filename+'/json/' + for filename in d.keys(): + with open(prefix+filename, 'w') as json_file: + json.dump(d[filename], json_file) + + + diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 8e253d75..14170ede 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -38,7 +38,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b, & - !$OMP ao_overlap_abs,sq_pi_3_2) + !$OMP ao_overlap_abs,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -46,7 +46,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n r(3) = final_grid_points(3,ipoint) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -58,7 +58,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_gauss = overlap_gauss_r12_ao(r, expo_fit, i, j) int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss enddo @@ -81,8 +81,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef -! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) @@ -145,14 +144,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, big_array,& - !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs) + !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs,thrsh_cycle_tc) ! allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -161,7 +160,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) -! if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -243,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & - !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) + !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -260,11 +258,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x_2(i_fit) coef_fit = coef_gauss_j_mu_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -278,7 +276,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -288,8 +286,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef_fit = coef_gauss_j_mu_x_2(i_fit) !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -350,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid @@ -369,7 +366,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -392,8 +389,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) -! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version - if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle +! if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. thrsh_cycle_tc) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -470,13 +466,13 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, ao_abs_comb_b3_j1b, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10) cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc) cycle r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -489,10 +485,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) - if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.1.d-15) cycle +! if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.thrsh_cycle_tc) cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) tmp += coef_fit * int_fit @@ -507,7 +503,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -517,7 +513,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle +! if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.thrsh_cycle_tc)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit @@ -527,9 +523,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - if(expo_coef_1s .gt. 20.d0) cycle +! if(expo_coef_1s .gt. 20.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-08) cycle +! if(dabs(coef_tmp) .lt. 1d-08) cycle int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 5c9f81e9..66a2b961 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b, & !$OMP v_ij_erf_rk_cst_mu_j1b_test, mu_erf, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid @@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -110,7 +110,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & !$OMP x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,thrsh_cycle_tc) ! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO do ipoint = 1, n_points_final_grid @@ -120,7 +120,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -130,19 +130,11 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + ! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) -! if(ao_prod_center(1,j,i).ne.10000.d0)then -! ! approximate 1 - erf(mu r12) by a gaussian * 10 -! !DIR$ FORCEINLINE -! call gaussian_product(expo_erfc_mu_gauss,r, & -! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & -! factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle -! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) @@ -216,7 +208,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -225,7 +217,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -234,11 +226,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + ! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) - if(ao_overlap_abs_grid(j,i).lt.1.d-15) cycle + ! if(ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc) cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -251,7 +243,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -259,9 +251,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle +! if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle +! if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -325,7 +317,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -334,7 +326,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -343,7 +335,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle expo_fit = expo_good_j_mu_1gauss int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += int_fit @@ -356,7 +348,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -364,9 +356,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, expo_fit = expo_good_j_mu_1gauss coef_fit = 1.d0 coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle + if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle + if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit ! enddo diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f index bf493fbb..9bcce449 100644 --- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f +++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f @@ -3,15 +3,16 @@ &BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b2_size = 0 + print*,'List_all_comb_b2_size = ',List_all_comb_b2_size +! pause do i = 1, ao_num do j = i, ao_num do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-15)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) @@ -24,7 +25,7 @@ dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b2_size(j,i) += 1 endif enddo @@ -40,6 +41,7 @@ list(i) = maxval(List_comb_thr_b2_size(:,i)) enddo max_List_comb_thr_b2_size = maxval(list) + print*,'max_List_comb_thr_b2_size = ',max_List_comb_thr_b2_size END_PROVIDER @@ -49,16 +51,15 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b2_j1b = 10000000.d0 do i = 1, ao_num do j = i, ao_num icount = 0 do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-12)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) int_j1b = 0.d0 @@ -70,7 +71,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b2_coef(icount,j,i) = coef List_comb_thr_b2_expo(icount,j,i) = beta @@ -98,17 +99,17 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b3_size = 0 + print*,'List_all_comb_b3_size = ',List_all_comb_b3_size do i = 1, ao_num do j = 1, ao_num do i_1s = 1, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) beta = List_all_comb_b3_expo (i_1s) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -118,7 +119,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b3_size(j,i) += 1 endif enddo @@ -144,9 +145,8 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b3_j1b = 10000000.d0 do i = 1, ao_num do j = 1, ao_num @@ -156,7 +156,7 @@ END_PROVIDER beta = List_all_comb_b3_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -166,7 +166,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b3_coef(icount,j,i) = coef List_comb_thr_b3_expo(icount,j,i) = beta @@ -177,15 +177,5 @@ END_PROVIDER enddo enddo -! do i = 1, ao_num -! do j = 1, i-1 -! do icount = 1, List_comb_thr_b3_size(j,i) -! List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j) -! List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j) -! List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j) -! enddo -! enddo -! enddo - END_PROVIDER diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index dfceddb5..d4c995e6 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -11,3 +11,8 @@ interface: ezfio,provider,ocaml default: False ezfio_name: direct +[do_ao_cholesky] +type: logical +doc: Perform Cholesky decomposition of AO integrals +interface: ezfio,provider,ocaml +default: False diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..d4c201aa --- /dev/null +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -0,0 +1,100 @@ +BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] + implicit none + BEGIN_DOC + ! Number of Cholesky vectors in AO basis + END_DOC + + integer :: i,j,k,l + double precision :: xnorm0, x, integral + double precision, external :: ao_two_e_integral + + cholesky_ao_num_guess = 0 + xnorm0 = 0.d0 + x = 0.d0 + do j=1,ao_num + do i=1,ao_num + integral = ao_two_e_integral(i,i,j,j) + if (integral > ao_integrals_threshold) then + cholesky_ao_num_guess += 1 + else + x += integral + endif + enddo + enddo + print *, 'Cholesky decomposition of AO integrals' + print *, '--------------------------------------' + print *, '' + print *, 'Estimated Error: ', x + print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' + +END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] + use mmap_module + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + END_DOC + + type(c_ptr) :: ptr + integer :: fd, i,j,k,l, rank + double precision, pointer :: ao_integrals(:,:,:,:) + double precision, external :: ao_two_e_integral + + ! Store AO integrals in a memory mapped file + call mmap(trim(ezfio_work_dir)//'ao_integrals', & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, .False., ptr) + call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) + + double precision :: integral + logical, external :: ao_two_e_integral_zero + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) + do l=1,ao_num + do j=1,l + do k=1,ao_num + do i=1,k + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Call Lapack + cholesky_ao_num = cholesky_ao_num_guess + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + + ! Remove mmap + double precision, external :: getUnitAndOpen + call munmap( & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, ptr) + open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') + close(99, status='delete') + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] + implicit none + BEGIN_DOC +! Transposed of the Cholesky vectors in AO basis set + END_DOC + integer :: i,j,k + do j=1,ao_num + do i=1,ao_num + do k=1,ao_num + cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) + enddo + enddo + enddo +END_PROVIDER + diff --git a/src/ao_two_e_ints/map_integrals.irp.f b/src/ao_two_e_ints/map_integrals.irp.f index fa7c29cc..7d6a7da4 100644 --- a/src/ao_two_e_ints/map_integrals.irp.f +++ b/src/ao_two_e_ints/map_integrals.irp.f @@ -486,7 +486,7 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val) PROVIDE ao_two_e_integrals_in_map ao_integrals_map if (ao_one_e_integral_zero(j,l)) then - out_val = 0.d0 + out_val(1:sze) = 0.d0 return endif diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 853972f7..5afd49ab 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -27,7 +27,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -74,7 +74,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -121,7 +121,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -168,7 +168,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -214,7 +214,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -261,7 +261,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd5c4977..ae4c9bd5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -75,7 +75,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -124,7 +124,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -173,7 +173,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -222,7 +222,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -271,7 +271,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 78047d1b..f5f5959a 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE ao_tc_sym_two_e_pot_in_map + !!! TODO :: OPENMP do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num diff --git a/src/ccsd/80.ccsd_spin.bats b/src/ccsd/80.ccsd_spin.bats new file mode 100644 index 00000000..0b616871 --- /dev/null +++ b/src/ccsd/80.ccsd_spin.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_spin_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +@test "clo" { +run clo.ezfio -534.564874409332 -0.007584571424 +} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +@test "dhno" { +run dhno.ezfio -130.816650109473 -0.012197331453 +} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +@test "f" { +run f.ezfio -99.616644511121 -0.003624525307 +} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +@test "hco" { +run hco.ezfio -113.405413962350 -0.007973455337 +} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +@test "oh" { +run oh.ezfio -75.614606132774 -0.004126661739 +} + +@test "sih2_3b1" { +run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +} + +@test "sih3" { +run sih3.ezfio -5.575343504534 -0.002094123268 +} + +@test "so" { +run so.ezfio -26.035945178665 -0.010594351274 +} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +#@test "clo" { +#run clo.ezfio -534.564874409333 -534.572458980757 +#} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -130.828847440925 +#} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +#@test "f" { +#run f.ezfio -99.616644511120 -99.620269036428 +#} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +#@test "hco" { +#run hco.ezfio -113.405413962350 -113.413387417687 +#} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +#@test "oh" { +#run oh.ezfio -75.614606131897 -75.618732794235 +#} +# +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +#} +# +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -5.577437627802 +#} +# +#@test "so" { +#run so.ezfio -26.035945181998 -26.046539528491 +#} + diff --git a/src/ccsd/81.ccsd_space.bats b/src/ccsd/81.ccsd_space.bats new file mode 100644 index 00000000..02e8e987 --- /dev/null +++ b/src/ccsd/81.ccsd_space.bats @@ -0,0 +1,225 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh1=1e-6 + thresh2=1e-6 + test_exe scf || skip + qp set_file $1 + qp edit --check + #qp run scf + qp set_frozen_core + qp set utils_cc cc_par_t true + qp set utils_cc cc_thresh_conv 1e-12 + file="$(echo $1 | sed 's/.ezfio//g')" + qp run ccsd_space_orb | tee $file.ccsd.out + energy1="$(grep 'E(CCSD)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + energy2="$(grep 'E(T)' $file.ccsd.out | tail -n 1 | awk '{printf $3}')" + #rm $file.ccsd.out + eq $energy1 $2 $thresh1 + eq $energy2 $3 $thresh2 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -49.136487344382 -0.003497589175 +} + +@test "be" { +run be.ezfio -14.623559003577 -0.000230982022 +} + +@test "c2h2" { +run c2h2.ezfio -12.394008897618 -0.010790491561 +} + +@test "ch4" { +run ch4.ezfio -40.390721785799 -0.004476100282 +} + +@test "clf" { +run clf.ezfio -559.186562904081 -0.006577143392 +} + +#@test "clo" { +#run clo.ezfio -534.564874409332 -0.007584571424 +#} + +@test "co2" { +run co2.ezfio -188.129602527766 -0.018040668885 +} + +#@test "dhno" { +#run dhno.ezfio -130.816650109473 -0.012197331453 +#} + +@test "f2" { +run f2.ezfio -199.287826338097 -0.017592872692 +} + +#@test "f" { +#run f.ezfio -99.616644511121 -0.003624525307 +#} + +@test "h2o2" { +run h2o2.ezfio -151.182552729963 -0.009511682086 +} + +@test "h2o" { +run h2o.ezfio -76.237710276526 -0.003001800577 +} + +@test "h2s" { +run h2s.ezfio -398.861214015390 -0.003300559757 +} + +@test "h3coh" { +run h3coh.ezfio -115.221296424969 -0.003566171432 +} + +@test "hbo" { +run hbo.ezfio -100.213539770415 -0.006851489212 +} + +@test "hcn" { +run hcn.ezfio -93.190247992657 -0.013418135043 +} + +#@test "hco" { +#run hco.ezfio -113.405413962350 -0.007973455337 +#} + +@test "lif" { +run lif.ezfio -107.270402903250 -0.007742969005 +} + +@test "n2" { +run n2.ezfio -109.355358930472 -0.018477744342 +} + +@test "n2h4" { +run n2h4.ezfio -111.556885923139 -0.009048077008 +} + +@test "nh3" { +run nh3.ezfio -56.465503060954 -0.007638273755 +} + +#@test "oh" { +#run oh.ezfio -75.614606132774 -0.004126661739 +#} + +#@test "sih2_3b1" { +#run sih2_3b1.ezfio -290.016780973072 -0.000497825874 +#} + +#@test "sih3" { +#run sih3.ezfio -5.575343504534 -0.002094123268 +#} + +#@test "so" { +#run so.ezfio -26.035945178665 -0.010594351274 +#} + +#@test "b2_stretched" { +#run b2_stretched.ezfio -49.136487344382 -49.139984933557 +#} +# +#@test "be" { +#run be.ezfio -14.623559003577 -14.623789985599 +#} +# +#@test "c2h2" { +#run c2h2.ezfio -12.394008897618 -12.404799389179 +#} +# +#@test "ch4" { +#run ch4.ezfio -40.390721784961 -40.395197884406 +#} +# +#@test "clf" { +#run clf.ezfio -559.186562906072 -559.193140046904 +#} +# +##@test "clo" { +##run clo.ezfio -534.564874409333 -534.572458980757 +##} +# +#@test "co2" { +#run co2.ezfio -188.129602511724 -188.147643198675 +#} +# +##@test "dhno" { +##run dhno.ezfio -130.816650109473 -130.828847440925 +##} +# +#@test "f2" { +#run f2.ezfio -199.287826338097 -199.305419210789 +#} +# +##@test "f" { +##run f.ezfio -99.616644511120 -99.620269036428 +##} +# +#@test "h2o2" { +#run h2o2.ezfio -151.182552729963 -151.192064412049 +#} +# +#@test "h2o" { +#run h2o.ezfio -76.237710276526 -76.240712077103 +#} +# +#@test "h2s" { +#run h2s.ezfio -398.861214015416 -398.864514575146 +#} +# +#@test "h3coh" { +#run h3coh.ezfio -115.221296424969 -115.224862596401 +#} +# +#@test "hbo" { +#run hbo.ezfio -100.213539770415 -100.220391259627 +#} +# +#@test "hcn" { +#run hcn.ezfio -93.190247983000 -93.203666131216 +#} +# +##@test "hco" { +##run hco.ezfio -113.405413962350 -113.413387417687 +##} +# +#@test "lif" { +#run lif.ezfio -107.270402903211 -107.278145872216 +#} +# +#@test "n2" { +#run n2.ezfio -109.355358930472 -109.373836674814 +#} +# +#@test "n2h4" { +#run n2h4.ezfio -111.556885922642 -111.565934000556 +#} +# +#@test "nh3" { +#run nh3.ezfio -56.465503060954 -56.473141334709 +#} +# +##@test "oh" { +##run oh.ezfio -75.614606131897 -75.618732794235 +##} +# +##@test "sih2_3b1" { +##run sih2_3b1.ezfio -290.016780973071 -290.017278798946 +##} +# +##@test "sih3" { +##run sih3.ezfio -5.575343504534 -5.577437627802 +##} +# +##@test "so" { +##run so.ezfio -26.035945181998 -26.046539528491 +##} + diff --git a/src/ccsd/NEED b/src/ccsd/NEED new file mode 100644 index 00000000..e6e6bc59 --- /dev/null +++ b/src/ccsd/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_cc diff --git a/src/ccsd/README.md b/src/ccsd/README.md new file mode 100644 index 00000000..fa59e8a6 --- /dev/null +++ b/src/ccsd/README.md @@ -0,0 +1,31 @@ +# CCSD in spin orbitals and spatial orbitals + +CCSD and CCSD(T) in spin orbitals for open and closed shell systems. +CCSD and CCSD(T) in spatial orbitals for closed shell systems. + +## Calculations +The program will automatically choose the version in spin or spatial orbitals +To run the general program: +``` +qp run ccsd +``` +Nevertheless, you can enforce the run in spin orbitals with +``` +qp run ccsd_spin_orb +``` + +## Settings +The settings can be changed with: +``` +qp set utils_cc cc_#param #val +``` +For more informations on the settings, look at the module utils_cc and its documentation. + +## Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh and +mv *.irp.f ../. +``` + diff --git a/src/ccsd/ccsd.irp.f b/src/ccsd/ccsd.irp.f new file mode 100644 index 00000000..035f50b8 --- /dev/null +++ b/src/ccsd/ccsd.irp.f @@ -0,0 +1,18 @@ +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD program + END_DOC + + read_wf = .True. + touch read_wf + + if (.not. cc_ref_is_open_shell) then + call run_ccsd_space_orb + else + call run_ccsd_spin_orb + endif + +end diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f new file mode 100644 index 00000000..53028ec0 --- /dev/null +++ b/src/ccsd/ccsd_space_orb.irp.f @@ -0,0 +1,12 @@ +! Code + +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f new file mode 100644 index 00000000..b63375cf --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -0,0 +1,2078 @@ +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end + +! Energy + +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end + +! H_vv + +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end + +! H_vo + +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end + +! R2 + +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end + +! A1 + +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! B1 + +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + +! g_occ + +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! g_vir + +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! J1 + +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f new file mode 100644 index 00000000..6f2de11c --- /dev/null +++ b/src/ccsd/ccsd_spin_orb.irp.f @@ -0,0 +1,16 @@ +! Prog + +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f new file mode 100644 index 00000000..23e2cef1 --- /dev/null +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -0,0 +1,2301 @@ +! Code + +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end + +! Energy + +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end + +! Tau + +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Tau_t + +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end + +! R2 + +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end + +! Use cF_oo + +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end + +! Use cF_ov + +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end + +! Use cF_vv + +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end + +! Use cW_vvvd + +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end + +! cF_oo + +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end + +! cF_ov + +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end + +! cF_vv + +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end + +! cW_oooo + +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end + +! cW_ovvo + +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end + +! cW_vvvv + +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end + +! cW_vvvf + +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f new file mode 100644 index 00000000..37f2b484 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -0,0 +1,513 @@ +! Dumb way + +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end + +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end + +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end + +! Main + +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + +! W_ijk + +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + double precision, allocatable, dimension(:,:,:) :: X, Y, Z + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + allocate(X(nV,nV,nV)) + allocate(Y(nV,nV,nV)) + allocate(Z(nV,nV,nV)) + + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do d = 1, nV + Z(d,a,b) = X_vvvo(d,b,a,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,k,j), nV, 0.d0, W, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do a = 1, nV + do d = 1, nV + Z(d,a,c) = X_vvvo(d,c,a,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,j,k), nV, 0.d0, Y, nV*nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,k), nV, T_vvoo(1,1,j,i), nV, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,j), nV, X_vvvo(1,1,1,k), nV, 1.d0, W, nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,k), nV, X_vvvo(1,1,1,j), nV, 1.d0, Y, nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,j), nV, T_vvoo(1,1,k,i), nV, 1.d0, W, nV*nV) + + deallocate(Z) + + + allocate(Z(nO,nV,nV)) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,j,k), nO, 1.d0, W, nV*nV) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,k,j), nO, 1.d0, Y, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,c) = T_ovvo(l,c,a,k) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,j), nO, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,j,i), nO, T_ovvo(1,1,1,k), nO, 1.d0, Y, nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,k,i), nO, T_ovvo(1,1,1,j), nO, 1.d0, W, nV) + + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,b) = T_ovvo(l,b,a,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,k), nO, 1.d0, W, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = W(a,b,c) + Y(a,c,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(X,Y,Z) + + +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & +! !$OMP PRIVATE(a,b,c,d,l) & +! !$OMP DEFAULT(NONE) +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! W(a,b,c) = 0.d0 +! +! do d = 1, nV +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (bd|ai) +! ! phys +! !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & +! !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj +! !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik +! !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij +! !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj +! !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik +! + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & +! + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj +! + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik +! + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij +! + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj +! + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO nowait +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! +! do l = 1, nO +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (ck|jl) +! ! phys +! !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & +! !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj +! !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik +! !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij +! !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj +! !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik +! - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & +! - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj +! - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik +! - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij +! - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj +! - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end + +! V_ijk + +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f new file mode 100644 index 00000000..3b762a06 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -0,0 +1,252 @@ +! Main + +subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) + double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_abc + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) + allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) + call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) + call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + + call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + !$OMP PARALLEL & + !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& + !$OMP PRIVATE(i,j,k,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + + +subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + + implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W_abc(nO,nO,nO) + + integer :: l,i,j,k,d + + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP PRIVATE(i,j,k,d,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do i = 1, nO + W_abc(i,j,k) = 0.d0 + + do d = 1, nV + W_abc(i,j,k) = W_abc(i,j,k) & + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + enddo + + do l = 1, nO + W_abc(i,j,k) = W_abc(i,j,k) & + - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & + - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj + - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik + - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij + - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj + - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + +end + + +! V_abc + +subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) + +implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nO,nO,nO) + double precision, intent(out) :: V(nO,nO,nO) + + integer :: i,j,k + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do k = 1, nO + do j = 1, nO + do i = 1, nO + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(i,j,k) = W(i,j,k) & + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + diff --git a/src/ccsd/ccsd_t_spin_orb.irp.f b/src/ccsd/ccsd_t_spin_orb.irp.f new file mode 100644 index 00000000..3f79e4a0 --- /dev/null +++ b/src/ccsd/ccsd_t_spin_orb.irp.f @@ -0,0 +1,376 @@ +! v1 + +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end + +! v2 + +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/ccsd/org/TANGLE_org_mode.sh similarity index 100% rename from src/utils_trust_region/TANGLE_org_mode.sh rename to src/ccsd/org/TANGLE_org_mode.sh diff --git a/src/ccsd/org/ccsd_space_orb.org b/src/ccsd/org/ccsd_space_orb.org new file mode 100644 index 00000000..a848fd26 --- /dev/null +++ b/src/ccsd/org/ccsd_space_orb.org @@ -0,0 +1,2121 @@ +* ccsd with spatial orbitals + +Scuseria, Gustavo E.; Janssen, Curtis L.; Schaefer, Henry +F. (1988). An efficient reformulation of the closed-shell coupled +cluster single and double excitation (CCSD) equations. The Journal of +Chemical Physics, 89(12), 7382–. doi:10.1063/1.455269 + +* Code +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb.irp.f +program ccsd + + implicit none + + read_wf = .True. + touch read_wf + + call run_ccsd_space_orb + +end +#+end_src + +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine run_ccsd_space_orb + + implicit none + + integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d + integer :: u,v,gam,beta,tmp_gam,tmp_beta + integer :: nb_iter + double precision :: get_two_e_integral + double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb + logical :: not_converged + + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t1(:,:), r1(:,:) + double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + integer, allocatable :: list_occ(:), list_vir(:) + integer(bit_kind) :: det(N_int,2) + integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + + PROVIDE mo_two_e_integrals_in_map + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Check that the reference is a closed shell determinant + if (cc_ref_is_open_shell) then + call abort + endif + + ! Number of occ/vir spatial orb + nO = nOa + nV = nVa + + allocate(list_occ(nO),list_vir(nV)) + list_occ = cc_list_occ + list_vir = cc_list_vir + ! Debug + !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) + !print*,'occ',list_occ + !print*,'vir',list_vir + + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) + allocate(tau(nO,nO,nV,nV)) + allocate(t1(nO,nV), r1(nO,nV)) + allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + if (elec_alpha_num /= elec_beta_num) then + print*, 'Only for closed shell systems' + print*, 'elec_alpha_num=',elec_alpha_num + print*, 'elec_beta_num =',elec_beta_num + print*, 'abort' + call abort + endif + + ! Init + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) + call update_tau_space(nO,nV,t1,t2,tau) + !print*,'hf_energy', hf_energy + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + print*,'Guess energy', uncorr_energy+energy, energy + + nb_iter = 0 + not_converged = .True. + max_r1 = 0d0 + max_r2 = 0d0 + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(ta) + + do while (not_converged) + + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + ! Residue + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + max_r = max(max_r1,max_r2) + + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call update_tau_space(nO,nV,t1,t2,tau) + + ! Energy + call ccsd_energy_space(nO,nV,tau,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocation + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + + deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + + ! CCSD(T) + double precision :: e_t + + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + + ! Dumb way + !call wall_time(ta) + !call ccsd_par_t_space(nO,nV,t1,t2,e_t) + !call wall_time(tb) + !print*,'Time: ',tb-ta, ' s' + + !print*,'' + !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + !print*,'' + + ! New + print*,'Computing (T) correction...' + call wall_time(ta) + call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + print*,'' + endif + + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(t1,t2) + +end +#+END_SRC + +* Energy +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine ccsd_energy_space(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do i = 1, nO + do a = 1, nV + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end +#+END_SRC + +* T +** Tau +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine update_tau_space(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* Residual equations +** R1 +*** R1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) + ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) + ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - 2d0 * cc_space_f_vo(a,i) * t1(i,beta) * t1(u,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + !do beta = 1, nV + ! do u = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! r1(u,beta) = r1(u,beta) - H_oo(u,i) * t1(i,beta) + ! enddo + ! enddo + !enddo + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + !r1(u,beta) = r1(u,beta) + H_vo(a,i) * (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! <=> + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * X(a,i,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + H_vo(a,i) * & + ! (2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! <=> + ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do u = 1, nO + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + !do beta = 1, nV + ! do u = 1, nO + ! do i = 1, nO + ! do j = 1, nO + ! do a = 1, nV + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + !$omp do collapse(3) + do u = 1, nO + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + if (dabs(r1(i,a)) > max_r1) then + max_r1 = dabs(r1(i,a)) + endif + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+end_src + +*** Intermediates +**** H_oo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + !H_oo = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! H_oo(u,i) = cc_space_f_oo(u,i) + + ! do j = 1, nO + ! do a = 1, nV + ! do b = 1, nV + ! !H_oo(u,i) = H_oo(u,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * tau(u,j,a,b) + ! !H_oo(u,i) = H_oo(u,i) + cc_space_w_vvoo(a,b,i,j) * tau(u,j,a,b) + ! H_oo(u,i) = H_oo(u,i) + cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end +#+END_SRC + +**** H_vv +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vv = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + + ! do j = 1, nO + ! do i = 1, nO + ! do b = 1, nV + ! !H_vv(a,beta) = H_vv(a,beta) - (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(a,b,j,i)) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! enddo + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) + ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) + + !$omp do collapse(3) + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end +#+END_SRC + +**** H_vo +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_H_vo(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + !H_vo = 0d0 + + !do i = 1, nO + ! do a = 1, nV + ! H_vo(a,i) = cc_space_f_vo(a,i) + + ! do j = 1, nO + ! do b = 1, nV + ! !H_vo(a,i) = H_vo(a,i) + (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! enddo + ! enddo + ! + ! enddo + !enddo + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end +#+END_SRC + +** R2 +*** R2 +#+begin_src f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1(nO,nV,t1,t2,tau,A1) + call compute_J1(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do j = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + A1(u,v,i,j) * tau(i,j,beta,gam) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do b = 1, nv + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + B1(a,b,beta,gam) * tau(u,v,a,b) + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + allocate(B1(nV,nV,nV,nV)) + call compute_B1(nO,nV,t1,t2,B1) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1,1) * size(B1,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(B1) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + g_vir(a,beta) * t2(u,v,a,gam) & + ! + g_vir(a,gam) * t2(v,u,a,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - g_occ(u,i) * t2(i,v,beta,gam) & + ! - g_occ(v,i) * t2(i,u,gam,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + cc_space_v_ovvv(u,a,beta,gam) * t1(v,a) & + ! + cc_space_v_ovvv(v,a,gam,beta) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovov(u,a,i,gam) * t1(i,beta) * t1(v,a) & + ! - cc_space_v_ovov(v,a,i,beta) * t1(i,gam) * t1(u,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_oovo(u,v,beta,i) * t1(i,gam) & + ! - cc_space_v_oovo(v,u,gam,i) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do i = 1, nO + ! do a = 1, nV + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - cc_space_v_ovvo(u,a,beta,i) * t1(v,a) * t1(i,gam) & + ! - cc_space_v_ovvo(v,a,gam,i) * t1(u,a) * t1(i,beta) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! + 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) * & + ! (2d0 * t2(i,v,a,gam) - t2(i,v,gam,a)) & + ! + 0.5d0 * (2d0 * J1(v,a,gam,i) - K1(v,a,i,gam)) * & + ! (2d0 * t2(i,u,a,beta) - t2(i,u,beta,a)) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - 0.5d0 * K1(u,a,i,beta) * t2(i,v,gam,a) & + ! - 0.5d0 * K1(v,a,i,gam) * t2(i,u,beta,a) !P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !do gam = 1, nV + ! do beta = 1, nV + ! do v = 1, nO + ! do u = 1, nO + ! do a = 1, nV + ! do i = 1, nO + ! r2(u,v,beta,gam) = r2(u,v,beta,gam) & + ! - K1(u,a,i,gam) * t2(i,v,beta,a) & + ! - K1(v,a,i,beta) * t2(i,u,gam,a) ! P + ! enddo + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do collapse(3) + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + if (dabs(r2(i,j,a,b)) > max_r2) then + max_r2 = dabs(r2(i,j,a,b)) + endif + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end +#+end_src + +*** Intermediates +**** A1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_A1(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + !A1 = 0d0 + + !do j = 1, nO + ! do i = 1, nO + ! do v = 1, nO + ! do u = 1, nO + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + + ! do a = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) & + ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) + ! + ! do b = 1, nV + ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end +#+END_SRC + +**** B1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_B1(nO,nV,t1,t2,B1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !B1 = 0d0 + + !do gam = 1, nV + ! do beta = 1, nV + ! do b = 1, nV + ! do a = 1, nV + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + ! do i = 1, nO + ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & + ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) + allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) + + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp do collapse(3) + do i = 1, nO + do gam = 1, nV + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,gam,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1 , size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2) * size(Y_vvvv,3)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv) & + !$omp private(a,b,beta,gam) & + !$omp default(none) + !$omp do collapse(3) + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = B1(a,b,beta,gam) + Y_vvvv(a,b,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end +#+END_SRC + +**** g_occ +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_occ = 0d0 + + !do i = 1, nO + ! do u = 1, nO + ! g_occ(u,i) = H_oo(u,i) + ! + ! do a = 1, nV + ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) + ! + ! do j = 1, nO + ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** g_vir +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !g_vir = 0d0 + + !do beta = 1, nV + ! do a = 1, nV + ! g_vir(a,beta) = H_vv(a,beta) + ! + ! do i = 1, nO + ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) + ! + ! do b = 1, nV + ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + ! enddo + ! + ! enddo + ! enddo + !enddo + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do collapse(1) + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end +#+END_SRC + +**** J1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !J1 = 0d0 + + !do i = 1, nO + ! do beta = 1, nV + ! do a = 1, nV + ! do u = 1, nO + ! J1(u,a,beta,i) = cc_space_v_ovvo(u,a,beta,i) + + ! do j = 1, nO + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_ovoo(u,a,j,i) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! J1(u,a,beta,i) = J1(u,a,beta,i) & + ! - cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do i = 1, nO + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end +#+END_SRC + +**** K1 +#+BEGIN_SRC f90 :comments org :tangle ccsd_space_orb_sub.irp.f +subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + !K1 = 0d0 + + !do beta = 1, nV + ! do i = 1, nO + ! do a = 1, nV + ! do u = 1, nO + ! K1(u,a,i,beta) = cc_space_v_ovov(u,a,i,beta) + + ! do j = 1, nO + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_ovoo(u,a,i,j) * t1(j,beta) + ! enddo + + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! enddo + + ! do j = 1, nO + ! do b = 1, nV + ! K1(u,a,i,beta) = K1(u,a,i,beta) & + ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) + ! enddo + ! enddo + ! + ! enddo + ! enddo + ! enddo + !enddo + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do i = 1, nO + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do j = 1, nO + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do collapse(3) + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end +#+END_SRC + diff --git a/src/ccsd/org/ccsd_spin_orb.org b/src/ccsd/org/ccsd_spin_orb.org new file mode 100644 index 00000000..8fb403c3 --- /dev/null +++ b/src/ccsd/org/ccsd_spin_orb.org @@ -0,0 +1,2352 @@ +* CCSD spin orb +** Ref +A direct product decomposition approach for symmetry exploitation in manybody +methods. I. Energy calculations +John F. Stanton, Jürgen Gauss, John D. Watts, and Rodney J. Bartlett +The Journal of Chemical Physics 94, 4334 (1991) +http://dx.doi.org/10.1063/1.460620A + +** Prog +#+begin_src f90 :comments org :tangle ccsd_spin_orb.irp.f +program ccsd + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + read_wf = .True. + touch read_wf + + call run_ccsd_spin_orb + +end +#+end_src + +** Code +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine run_ccsd_spin_orb + + implicit none + + BEGIN_DOC + ! CCSD in spin orbitals + END_DOC + + double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) + double precision, allocatable :: r1(:,:), r2(:,:,:,:) + double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) + double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) + double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) + double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) + double precision, allocatable :: v_vvvv(:,:,:,:) + + double precision, allocatable :: all_err(:,:), all_t(:,:) + + logical :: not_converged + integer, allocatable :: list_occ(:,:), list_vir(:,:) + integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nb_iter, i,j,a,b + double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + integer(bit_kind) :: det(N_int,2) + + det = psi_det(:,:,cc_ref) + print*,'Reference determinant:' + call print_det(det,N_int) + + ! Extract number of occ/vir alpha/beta spin orbitals + !call extract_n_spin(det,n_spin) + nOa = cc_nOa !n_spin(1) + nOb = cc_nOb !n_spin(2) + nVa = cc_nVa !n_spin(3) + nVb = cc_nVb !n_spin(4) + + ! Total number of occ/vir spin orb + nO = cc_nOab !nOa + nOb + nV = cc_nVab !nVa + nVb + ! Debug + !print*,nO,nV + + ! Number of occ/vir spin orb per spin + nO_S = cc_nO_S !(/nOa,nOb/) + nV_S = cc_nV_S !(/nVa,nVb/) + ! Debug + !print*,nO_S,nV_S + + ! Maximal number of occ/vir + nO_m = cc_nO_m !max(nOa, nOb) + nV_m = cc_nV_m !max(nVa, nVb) + ! Debug + !print*,nO_m,nV_m + + allocate(list_occ(nO_m,2), list_vir(nV_m,2)) + list_occ = cc_list_occ_spin + list_vir = cc_list_vir_spin + ! Debug + !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + !print*,list_occ(:,1) + !print*,list_occ(:,2) + !print*,list_vir(:,1) + !print*,list_vir(:,2) + + ! Allocation + allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) + allocate(r1(nO,nV), r2(nO,nO,nV,nV)) + allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) + allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) + allocate(v_oooo(nO,nO,nO,nO)) + !allocate(v_vooo(nV,nO,nO,nO)) + allocate(v_ovoo(nO,nV,nO,nO)) + allocate(v_oovo(nO,nO,nV,nO)) + allocate(v_ooov(nO,nO,nO,nV)) + allocate(v_vvoo(nV,nV,nO,nO)) + !allocate(v_vovo(nV,nO,nV,nO)) + !allocate(v_voov(nV,nO,nO,nV)) + allocate(v_ovvo(nO,nV,nV,nO)) + allocate(v_ovov(nO,nV,nO,nV)) + allocate(v_oovv(nO,nO,nV,nV)) + !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvov(nV,nV,nO,nV)) + !allocate(v_vovv(nV,nO,nV,nV)) + !allocate(v_ovvv(nO,nV,nV,nV)) + !allocate(v_vvvv(nV,nV,nV,nV)) + allocate(f_o(nO), f_v(nV)) + allocate(f_oo(nO, nO)) + allocate(f_ov(nO, nV)) + allocate(f_vv(nV, nV)) + + ! Allocation for the diis + if (cc_update_method == 'diis') then + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + all_err = 0d0 + all_t = 0d0 + endif + + ! Fock elements + call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) + call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) + call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) + + ! Diag elements + do i = 1, nO + f_o(i) = f_oo(i,i) + enddo + do i = 1, nV + f_v(i) = f_vv(i,i) + enddo + + ! Bi electronic integrals from list + call wall_time(ti) + ! OOOO + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + + ! OOO V + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + + ! OO VV + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + + ! O VVV + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + + ! VVVV + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Load bi elec int:',tf-ti,'s' + endif + + ! Init of T + t1 = 0d0 + call guess_t1(nO,nV,f_o,f_v,f_ov,t1) + call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + ! Loop init + nb_iter = 0 + not_converged = .True. + r1 = 0d0 + r2 = 0d0 + max_r1 = 0d0 + max_r2 = 0d0 + + call det_energy(det,uncorr_energy) + print*,'Det energy', uncorr_energy + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + + write(*,'(A77)') ' -----------------------------------------------------------------------------' + write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' + write(*,'(A77)') ' -----------------------------------------------------------------------------' + + call wall_time(ta) + + ! Loop + do while (not_converged) + + ! Intermediates + call wall_time(tbi) + call wall_time(ti) + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + call wall_time(ti) + call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call wall_time(tf) + if (cc_dev) then + print*,'Compute cFs:',tf-ti,'s' + endif + + ! Residuals + call wall_time(ti) + call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r1:',tf-ti,'s' + endif + call wall_time(ti) + call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call wall_time(tf) + if (cc_dev) then + print*,'Compute r2:',tf-ti,'s' + endif + + ! Max elements in the residuals + max_r1 = maxval(abs(r1(:,:))) + max_r2 = maxval(abs(r2(:,:,:,:))) + max_r = max(max_r1,max_r2) + + call wall_time(ti) + ! Update + if (cc_update_method == 'diis') then + !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + + call compute_tau_spin(nO,nV,t1,t2,tau) + call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call wall_time(tf) + if (cc_dev) then + print*,'Update:',tf-ti,'s' + endif + + ! Print + call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call wall_time(tfi) + + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + if (cc_dev) then + print*,'Total:',tfi-tbi,'s' + endif + + ! Convergence + nb_iter = nb_iter + 1 + if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then + not_converged = .False. + endif + + enddo + write(*,'(A77)') ' -----------------------------------------------------------------------------' + call wall_time(tb) + print*,'Time: ',tb-ta, ' s' + print*,'' + if (max_r < cc_thresh_conv) then + write(*,'(A30,I6,A11)') ' Successful convergence after ', nb_iter, ' iterations' + else + write(*,'(A26,I6,A11)') ' Failed convergence after ', nb_iter, ' iterations' + endif + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' + write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + print*,'' + + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + + ! Deallocate + if (cc_update_method == 'diis') then + deallocate(all_err,all_t) + endif + deallocate(tau,tau_t) + deallocate(r1,r2) + deallocate(cF_oo,cF_ov,cF_vv) + deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) + deallocate(v_oooo) + deallocate(v_ovoo,v_oovo) + deallocate(v_ovvo,v_ovov,v_oovv) + + if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then + double precision :: t_corr + print*,'CCSD(T) calculation...' + call wall_time(ta) + !allocate(v_vvvo(nV,nV,nV,nO)) + !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + ! nV,nV,nV,nO, v_vvvo) + + !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !print*,'Working on it...' + !call abort + call wall_time(tb) + print*,'Done' + print*,'Time: ',tb-ta, ' s' + print*,'' + write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' E(T) = ', t_corr, ' Ha' + write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' + print*,'' + endif + print*,'Reference determinant:' + call print_det(det,N_int) + + deallocate(f_oo,f_ov,f_vv,f_o,f_v) + deallocate(v_ooov,v_vvoo,t1,t2) + !deallocate(v_ovvv,v_vvvo,v_vovv) + !deallocate(v_vvvv) + +end +#+end_src + +* Energy +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) + + implicit none + + BEGIN_DOC + ! CCSD energy in spin orbitals + END_DOC + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: energy + + integer :: i,j,a,b + + + energy = 0d0 + + do i=1,nO + do a=1,nV + energy = energy + Fov(i,a) * t1(i,a) + end do + end do + + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + energy = energy & + + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) + end do + end do + end do + end do + +end +#+end_src + +* T +** Update +*** Tau +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_spin(nO,nV,t1,t2,tau) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** Tau_t +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_tau_t_spin(nO,nV,t1,t2,tau_t) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + + double precision,intent(out) :: tau_t(nO,nO,nV,nV) + + integer :: i,j,k,l + integer :: a,b,c,d + + !$OMP PARALLEL & + !$OMP SHARED(tau_t,t1,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i=1,nO + do j=1,nO + do a=1,nV + do b=1,nV + tau_t(i,j,a,b) = t2(i,j,a,b) + 0.5d0*(t1(i,a)*t1(j,b) - t1(i,b)*t1(j,a)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* R +** R1 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_ovov(nO,nV,nO,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: r1(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + !double precision, allocatable :: X_vovv(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:) + double precision :: accu + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,Fov,cF_vv,cF_ov, & + !$OMP v_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + r1(i,a) = Fov(i,a) + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + do f=1,nV + do n=1,nO + r1(i,a) = r1(i,a) - t1(n,f)*v_ovov(n,a,i,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! r1(i,a) = r1(i,a) + t1(i,e)*cF_vv(a,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + cF_vv, size(cF_vv,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do a=1,nV + ! do i=1,nO + ! do f=1,nV + ! do e=1,nV + ! do m=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(i,m,e,f)*v_ovvv(m,a,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !allocate(X_vovv(nV,nO,nV,nV)) + double precision, allocatable :: v_ovvf(:,:,:), X_vovf(:,:,:) + allocate(v_ovvf(nO,nV,nV),X_vovf(nV,nO,nV)) + + do f = 1, nV + call gen_v_spin_3idx(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovvf) + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + + !$OMP DO collapse(3) + !do f = 1, nV + do e = 1, nV + do m = 1, nO + do a = 1, nV + !X_vovv(a,m,e,f) = v_ovvv(m,a,e,f) + X_vovf(a,m,e) = v_ovvf(m,a,e) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nO, nV, nO*nV, & + -0.5d0, t2(1,1,1,f), size(t2,1), & + X_vovf, size(X_vovf,1), & + 1d0 , r1 , size(r1,1)) + enddo + + !call dgemm('N','T', nO, nV, nO*nV*nV, & + ! -0.5d0, t2 , size(t2,1), & + ! X_vovv, size(X_vovv,1), & + ! 1d0 , r1 , size(r1,1)) + + deallocate(X_vovf) + !deallocate(X_vovv) + allocate(X_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,t2,X_oovv, & + !$OMP f_o,f_v,v_oovo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + + !do a=1,nV + ! do i=1,nO + ! do e=1,nV + ! do m=1,nO + ! do n=1,nO + ! r1(i,a) = r1(i,a) - 0.5d0*t2(m,n,a,e)*v_oovo(n,m,e,i) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(3) + do a = 1, nV + do e = 1, nV + do m = 1, nO + do n = 1, nO + X_oovv(n,m,e,a) = t2(m,n,a,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & + X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + 1d0 , r1 , size(r1,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,f,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + r1(i,a) = (f_o(i)-f_v(a)) * t1(i,a) - r1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oovv) + +end +#+end_src + +** R2 +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: cF_oo(nO,nO) + double precision,intent(in) :: cF_ov(nO,nV) + double precision,intent(in) :: cF_vv(nV,nV) + double precision,intent(in) :: f_o(nO), f_v(nV) + double precision,intent(in) :: cW_oooo(nO,nO,nO,nO) + !double precision,intent(in) :: cW_vvvv(nV,nV,nV,nV) + double precision,intent(in) :: cW_ovvo(nO,nV,nV,nO) + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_vvvo(nV,nV,nV,nO)!, v_vovv(nV,nO,nV,nV) + + double precision,intent(out) :: r2(nO,nO,nV,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_vvoo(:,:,:,:) + !double precision, allocatable :: A_vvov(:,:,:,:) + double precision, allocatable :: X_oovv(:,:,:,:), Y_oovv(:,:,:,:) + double precision, allocatable :: A_vvoo(:,:,:,:), B_ovoo(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: A_ovov(:,:,:,:), B_ovvo(:,:,:,:), X_ovvo(:,:,:,:) + double precision, allocatable :: A_vv(:,:) + double precision, allocatable :: A_oo(:,:), B_oovv(:,:,:,:) + double precision, allocatable :: A_vbov(:,:,:), X_vboo(:,:,:), v_vbvo(:,:,:) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + ! r2(i,j,a,b) = v_oovv(i,j,a,b) + ! end do + ! end do + ! end do + !end do + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(i,j,a,e)*cF_vv(b,e) + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,j,b,e)*cF_vv(a,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','T',nO*nO*nV, nV, nV, & + 1d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + cF_VV , size(cF_vv,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = v_oovv(i,j,a,b) + X_oovv(i,j,a,b) - X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !deallocate(X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV))!, X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(Y_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(Y_oovv)!,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oo,B_oovv,X_oovv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do n=1,nO + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(m,n,a,b)*cW_oooo(m,n,i,j) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nO*nO, nV*nV, nO*nO, & + 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & + tau , size(tau,1) * size(tau,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + !call dgemm('N','T', nO*nO, nV*nV, nV*nV, & + ! 0.5d0, tau , size(tau,1) * size(tau,2), & + ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & + ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf + call wall_time(ti) + call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + call wall_time(tf) + if (cc_dev) then + print*,'cW_vvvv:',tf-ti,'s' + endif + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) & + ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & + ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & + ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & + ! + t2(j,m,b,e)*cW_ovvo(m,a,e,i) & + ! - t1(i,e)*t1(m,a)*v_ovvo(m,b,e,j) & + ! + t1(j,e)*t1(m,a)*v_ovvo(m,b,e,i) & + ! + t1(i,e)*t1(m,b)*v_ovvo(m,a,e,j) & + ! - t1(j,e)*t1(m,b)*v_ovvo(m,a,e,i) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_ovov(nO,nV,nO,nV), B_ovvo(nO,nV,nV,nO), X_ovvo(nO,nV,nV,nO)) + !$OMP PARALLEL & + !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do a = 1, nV + do i = 1, nO + do e = 1, nV + do m = 1, nO + A_ovov(m,e,i,a) = t2(i,m,a,e) + end do + end do + end do + end do + !$OMP END DO NOWAIT + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do e = 1, nV + do m = 1, nO + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & + 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & + B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & + 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = r2(i,j,a,b) + X_ovvo(i,a,b,j) - X_ovvo(j,a,b,i) & + - X_ovvo(i,b,a,j) + X_ovvo(j,b,a,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovov,B_ovvo,X_ovvo) + allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do m = 1, nO + do j = 1, nO + do b = 1, nV + do e = 1, nV + A_vvoo(e,b,j,m) = v_ovvo(m,b,e,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nV*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + A_vvoo, size(A_vvoo,1), & + 0d0, B_ovoo, size(B_ovoo,1)) + + call dgemm('N','N', nO*nV*nO, nV, nO, & + 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & + t1 , size(t1,1), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,C_ovov,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - C_ovov(i,b,j,a) + C_ovov(j,b,i,a) & + + C_ovov(i,a,j,b) - C_ovov(j,a,i,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vvoo, B_ovoo, C_ovov) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + t1(i,e)*v_vvvo(a,b,e,j) - t1(j,e)*v_vvvo(a,b,e,i) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(A_vvov(nV,nV,nO,nV), X_vvoo(nV,nV,nO,nO)) + allocate(A_vbov(nV,nO,nV), X_vboo(nV,nO,nO), v_vbvo(nV,nV,nO)) + do b = 1, nV + + call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nO, v_vbvo) + + !$OMP PARALLEL & + !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + !do b = 1, nV + do a = 1, nV + !A_vvov(a,b,j,e) = v_vvvo(a,b,e,j) + A_vbov(a,j,e) = v_vbvo(a,e,j) + enddo + !enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','T', nV*nO, nO, nV, & + 1d0, A_vbov, size(A_vbov,1) * size(A_vbov,2), & + t1 , size(t1,1), & + 0d0, X_vboo, size(X_vboo,1) * size(X_vboo,2)) + !call dgemm('N','T', nV*nV*nO, nO, nV, & + ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & + ! t1 , size(t1,1), & + ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) + + !$OMP PARALLEL & + !$OMP SHARED(b,r2,X_vboo,nO,nV) & + !$OMP PRIVATE(i,j,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + !do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, NO + !r2(i,j,a,b ) = r2(i,j,a,b) + X_vvoo(a,b,j,i) - X_vvoo(a,b,i,j) + r2(i,j,a,b) = r2(i,j,a,b) + X_vboo(a,j,i) - X_vboo(a,i,j) + enddo + enddo + enddo + !enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + !deallocate(A_vvov)!,X_vvoo) + deallocate(A_vbov, X_vboo, v_vbvo) + allocate(X_vvoo(nV,nV,nO,nO)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t1(m,a)*v_ovoo(m,b,i,j) + t1(m,b)*v_ovoo(m,a,i,j) + ! end do + + ! end do + ! end do + ! end do + !end do + !allocate(X_vvoo(nV,nV,nO,nO)) + + call dgemm('T','N', nV, nV*nO*nO, nO, & + 1d0, t1 , size(t1,1), & + v_ovoo, size(v_ovoo,1), & + 0d0, X_vvoo, size(X_vvoo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & + !$OMP PRIVATE(i,j,a,b,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_vvoo(a,b,i,j) + X_vvoo(b,a,i,j) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = (f_o(i)+f_o(j)-f_v(a)-f_v(b)) * t2(i,j,a,b) - r2(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_vvoo) + +end +#+end_src + +* Use intermediates +** Use cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) + + implicit none + + integer,intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) + double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + integer :: i,j,m,a,b + + allocate(cF_oo(nO,nO)) + + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) + + !do a=1,nV + ! do i=1,nO + ! do m=1,nO + ! r1(i,a) = r1(i,a) - t1(m,a)*cF_oo(m,i) + ! end do + ! end do + !end do + call dgemm('T','N', nO, nV, nO, & + -1d0, cF_oo, size(cF_oo,1), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - t2(i,m,a,b)*cF_oo(m,j) + ! r2(i,j,a,b) = r2(i,j,a,b) + t2(j,m,a,b)*cF_oo(m,i) + ! end do + + ! end do + ! end do + ! end do + !end do + + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + !$OMP PARALLEL & + !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do i=1,nO + do m=1,nO + X_oovv(m,i,a,b) = t2(i,m,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO, nO*nV*nV, nO, & + 1d0, cF_oo , size(cF_oo,1), & + X_oovv, size(X_oovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_oo,X_oovv,Y_oovv) + +end +#+end_src + +** Use cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) + double precision, allocatable :: X_oovv(:,:,:,:), B_oovv(:,:,:,:) + integer :: i,j,a,b,e,m + + allocate(cF_ov(nO,nV)) + + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) + + !$OMP PARALLEL & + !$OMP SHARED(r1,t2,cF_ov,nO,nV) & + !$OMP PRIVATE(i,a,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(1) + do a=1,nV + do i=1,nO + do e=1,nV + do m=1,nO + r1(i,a) = r1(i,a) + t2(i,m,a,e)*cF_ov(m,e) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,j,a,e)*t1(m,b)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(i,j,b,e)*t1(m,a)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_vv(nV,nV), X_oovv(nO,nO,nV,nV)) + call dgemm('T','N', nV, nV, nO, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_vv , size(A_vv,1)) + + call dgemm('N','T', nO*nO*nV, nV, nV, & + 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & + A_vv , size(A_vv,1), & + 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,r2,X_oovv) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_vv) + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do e=1,nV + ! do m=1,nO + ! r2(i,j,a,b) = r2(i,j,a,b) - 0.5d0*t2(i,m,a,b)*t1(j,e)*cF_ov(m,e) + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*t2(j,m,a,b)*t1(i,e)*cF_ov(m,e) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) + + call dgemm('N','T', nO, nO, nV, & + 1d0, t1 , size(t1,1), & + cF_ov, size(cF_ov,1), & + 0d0, A_oo , size(A_oo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(t2,B_oovv,nO,nV) & + !$OMP PRIVATE(i,m,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do i = 1, nO + do m = 1, nO + B_oovv(m,i,a,b) = t2(i,m,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('N','N', nO, nO*nV*nV, nO, & + 0.5d0, A_oo, size(A_oo,1), & + B_oovv, size(B_oovv,1), & + 0d0 , X_oovv, size(X_oovv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(r2,X_oovv,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b=1,nV + do a=1,nV + do j=1,nO + do i=1,nO + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(j,i,a,b) + X_oovv(i,j,a,b) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(cF_ov,A_oo,B_oovv,X_oovv) + +end +#+end_src + +** Use cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, allocatable :: cF_vv(:,:) + integer :: i,j,a,b,e,m + + allocate(cF_vv(nV,nV)) + + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) + + deallocate(cF_vv) + +end +#+end_src + +** Use cW_vvvd +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) + + double precision, intent(inout) :: r2(nO,nO,nV,nV) + + double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) + integer :: i,j,e,f + double precision :: ti,tf + + allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) + + !PROVIDE cc_nVab + + !do b=1,nV + ! do a=1,nV + ! do j=1,nO + ! do i=1,nO + + ! do f=1,nV + ! do e=1,nV + ! r2(i,j,a,b) = r2(i,j,a,b) + 0.5d0*tau(i,j,e,f)*cW_vvvv(a,b,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + do f = 1, nV + call wall_time(ti) + !$OMP PARALLEL & + !$OMP SHARED(tau,tau_f,f,nO,nV) & + !$OMP PRIVATE(i,j,e) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do e = 1, nV + do j = 1, nO + do i = 1, nO + tau_f(i,j,e) = tau(i,j,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'1st transpo', tf-ti + endif + + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nV,nV, v_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vvvf', tf-ti + endif + call wall_time(ti) + call gen_v_spin_3idx(cc_nV_m,cc_nO_m,cc_nV_m,cc_nV_m, f, cc_nV_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + nV,nO,nV, v_vovf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'vovf', tf-ti + endif + + call wall_time(ti) + call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'cW_vvvf', tf-ti + endif + + call wall_time(ti) + call dgemm('N','T', nO*nO, nV*nV, nV, & + 0.5d0, tau_f , size(tau_f,1) * size(tau_f,2), & + cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2), & + 1d0 , r2 , size(r2,1) * size(r2,2)) + call wall_time(tf) + if (cc_dev .and. f == 1) then + print*,'last dgemm', tf-ti + endif + enddo + + deallocate(cW_vvvf,v_vvvf,v_vovf) + +end +#+end_src + +* Intermediates +** cF +*** cF_oo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Foo(nO,nO) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_oo(nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision,external :: Kronecker_Delta + + !$OMP PARALLEL & + !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & + !$OMP PRIVATE(i,m,n,e) & + !$OMP DEFAULT(NONE) + + !do i=1,nO + ! do m=1,nO + ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) + ! end do + !end do + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = Foo(m,i) + end do + end do + !$OMP END DO + !$OMP DO + do i = 1, nO + cF_oo(i,i) = 0d0 + end do + !$OMP END DO + + do e=1,nV + do n=1,nO + !$OMP DO collapse(1) + do i=1,nO + do m=1,nO + cF_oo(m,i) = cF_oo(m,i) + t1(n,e)*v_ooov(m,n,i,e) + end do + end do + !$OMP END DO + end do + end do + !$OMP END PARALLEL + + !do i=1,nO + ! do m=1,nO + ! do e=1,nV + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*t1(i,e)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nV,& + 0.5d0, Fov , size(Fov,1), & + t1 , size(t1,1), & + 1d0 , cF_oo, size(cF_oo,1)) + + !do i=1,nO + ! do m=1,nO + ! do f=1,nV + ! do e=1,nV + ! do n=1,nO + ! cF_oo(m,i) = cF_oo(m,i) + 0.5d0*tau_t(i,n,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + call dgemm('N','T', nO, nO, nO*nV*nV, & + 0.5d0, v_oovv, size(v_oovv,1), & + tau_t , size(tau_t,1), & + 1d0 , cF_oo , size(cF_oo,1)) + +end +#+end_src + +*** cF_ov +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: Fov(nO,nV),v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cF_ov(nO,nV) + + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & + !$OMP PRIVATE(i,a,m,n,e,f) & + !$OMP DEFAULT(NONE) + + !cF_ov = Fov + + !$OMP DO collapse(1) + do e=1,nV + do m=1,nO + cF_ov(m,e) = Fov(m,e) + do f=1,nV + do n=1,nO + cF_ov(m,e) = cF_ov(m,e) + t1(n,f)*v_oovv(m,n,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** cF_vv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: tau_t(nO,nO,nV,nV) + double precision,intent(in) :: Fov(nO,nV) + double precision,intent(in) :: Fvv(nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cF_vv(nV,nV) + + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) + integer :: i,j,m,n + integer :: a,b,e,f + + !$OMP PARALLEL & + !$OMP SHARED(cF_vv,Fvv,nO,nV) & + !$OMP PRIVATE(e,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do e=1,nV + do a=1,nV + cF_vv(a,e) = Fvv(a,e) + end do + end do + !$OMP END DO + !$OMP DO + do e = 1, nV + cF_vv(e,e) = 0d0 + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*t1(m,a)*Fov(m,e) + ! end do + ! end do + !end do + call dgemm('T','N', nV, nV, nO, & + -0.5d0, t1 , size(t1,1), & + Fov , size(Fov,1), & + 1d0 , cF_vv, size(cF_vv,1)) + + !do e=1,nV + ! do a=1,nV + ! do m=1,nO + ! do f=1,nV + ! cF_vv(a,e) = cF_vv(a,e) + t1(m,f)*v_ovvv(m,a,f,e) + ! end do + ! end do + ! end do + !end do + allocate(v_ovfv(nO,nV,nV),X_ovfv(nO,nV,nV)) + do f = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, f, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovfv) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,v_ovfv,X_ovfv,f) & + !$OMP PRIVATE(m,a,e) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do e = 1, nV + do a = 1, nV + do m = 1, nO + !X_ovfv(m,a,e) = v_ovvv(m,a,f,e) + X_ovfv(m,a,e) = v_ovfv(m,a,e) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv('T', nO, nV*nV, & + !1d0, v_ovvv(:,:,f,:), size(v_ovvv,1), & + 1d0, X_ovfv, size(X_ovfv,1), & + t1(1,f), 1, & + 1d0, cF_vv, 1) + enddo + deallocate(v_ovfv,X_ovfv) + + !do e=1,nV + ! do a=1,nV + ! do f=1,nV + ! do n=1,nO + ! do m=1,nO + ! cF_vv(a,e) = cF_vv(a,e) - 0.5d0*tau_t(m,n,a,f)*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + do f = 1, nV + call dgemm('T','N', nV, nV, nO*nO,& + -0.5d0, tau_t(1,1,1,f) , size(tau_t,1) * size(tau_t,2), & + v_oovv(1,1,1,f), size(v_oovv,1) * size(v_oovv,2), & + 1d0 , cF_vv, size(cF_vv,1)) + enddo + +end +#+end_src + +** cW +*** cW_oooo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oooo(nO,nO,nO,nO) + double precision,intent(in) :: v_ooov(nO,nO,nO,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + + double precision,intent(out) :: cW_oooo(nO,nO,nO,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: X_oooo(:,:,:,:) + + ! oooo block + + !cW_oooo = v_oooo + + !do j=1,nO + ! do i=1,nO + ! do n=1,nO + ! do m=1,nO + + ! do e=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + t1(j,e)*v_ooov(m,n,i,e) - t1(i,e)*v_ooov(m,n,j,e) + ! end do + + ! end do + ! end do + ! end do + !end do + allocate(X_oooo(nO,nO,nO,nO)) + + call dgemm('N','T', nO*nO*nO, nO, nV, & + 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & + t1 , size(t1,1), & + 0d0, X_oooo, size(X_oooo,1) * size(X_oooo,1) * size(X_oooo,3)) + !$OMP PARALLEL & + !$OMP SHARED(cW_oooo,v_oooo,X_oooo,nO,nV) & + !$OMP PRIVATE(i,j,m,n) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j=1,nO + do i=1,nO + do n=1,nO + do m=1,nO + cW_oooo(m,n,i,j) = v_oooo(m,n,i,j) + X_oooo(m,n,i,j) - X_oooo(m,n,j,i) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(X_oooo) + + !do m=1,nO + ! do n=1,nO + ! do i=1,nO + ! do j=1,nO + ! + ! do e=1,nV + ! do f=1,nV + ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + call dgemm('N','T', nO*nO, nO*nO, nV*nV, & + 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & + tau , size(tau,1) * size(tau,2), & + 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) + +end +#+end_src + +*** cW_ovvo +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovo(nO,nO,nV,nO) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_ovvo(nO,nV,nV,nO) + !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) + + double precision,intent(out) :: cW_ovvo(nO,nV,nV,nO) + + integer :: i,j,m,n + integer :: a,b,e,f + double precision, allocatable :: A_oovo(:,:,:,:), B_vovo(:,:,:,:) + double precision, allocatable :: A_voov(:,:,:,:), B_voov(:,:,:,:), C_ovov(:,:,:,:) + double precision, allocatable :: v_ovev(:,:,:), cW_oveo(:,:,:) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,v_ovvo,nO,nV) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do j = 1, nO + do b = 1, nV + do a = 1, nV + do i = 1, nO + cW_ovvo(i,a,b,j) = v_ovvo(i,a,b,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !do m=1,nO + ! do b=1,nV + ! do e=1,nV + ! do j=1,nO + ! do f=1,nV + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + t1(j,f)*v_ovvv(m,b,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + allocate(v_ovev(nO,nV,nV),cW_oveo(nO,nV,nO)) + do e = 1, nV + + call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & + nO,nV,nV, v_ovev) + + call dgemm('N','T', nO*nV, nO, nV, & + 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & + t1 , size(t1,1), & + 0.d0, cW_oveo, size(cW_oveo,1) * size(cW_oveo,2)) + !$OMP PARALLEL & + !$OMP SHARED(e,cW_ovvo,cW_oveo,nO,nV) & + !$OMP PRIVATE(m,b,j) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do j = 1, nO + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) + cW_oveo(m,b,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + deallocate(v_ovev,cW_oveo) + !call dgemm('N','T', nO*nV*nV, nO, nV, & + ! 1.d0, v_ovvv , size(v_ovvv,1) * size(v_ovvv,2) * size(v_ovvv,3), & + ! t1 , size(t1,1), & + ! 1.d0, cW_ovvo, size(cW_ovvo,1) * size(cW_ovvo,2) * size(cW_ovvo,3)) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - t1(n,b)*v_oovo(m,n,e,j) + ! end do + ! end do + ! end do + ! end do + !end do + + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) + + !$OMP PARALLEL & + !$OMP SHARED(A_oovo,v_oovo,nO,nV) & + !$OMP PRIVATE(j,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do m=1,nO + do n=1,nO + A_oovo(n,m,e,j) = v_oovo(m,n,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nO*nV*nO, nO, & + 1d0, t1 , size(t1,1), & + A_oovo, size(A_oovo,1), & + 0d0, B_vovo, size(B_vovo,1)) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j=1,nO + do e=1,nV + do b=1,nV + do m=1,nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - B_vovo(b,m,e,j) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + deallocate(A_oovo,B_vovo) + + !do j=1,nO + ! do e=1,nV + ! do b=1,nV + ! do m=1,nO + ! do f=1,nV + ! do n=1,nO + ! cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) & + ! - ( 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) )*v_oovv(m,n,e,f) + ! end do + ! end do + ! end do + ! end do + ! end do + !end do + allocate(A_voov(nV,nO,nO,nV), B_voov(nV,nO,nO,nV), C_ovov(nO,nV,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & + !$OMP PRIVATE(f,n,m,e,j,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do b = 1, nV + do j = 1, nO + do n = 1, nO + do f = 1, nV + A_voov(f,n,j,b) = 0.5d0*t2(j,n,f,b) + t1(j,f)*t1(n,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO collapse(3) + do e = 1, nV + do m = 1, nO + do n = 1, nO + do f = 1, nV + B_voov(f,n,m,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & + 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & + B_voov, size(B_voov,1) * size(B_voov,2), & + 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) + + deallocate(A_voov,B_voov) + + !$OMP PARALLEL & + !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & + !$OMP PRIVATE(j,e,m,b) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do j = 1, nO + do e = 1, nV + do b = 1, nV + do m = 1, nO + cW_ovvo(m,b,e,j) = cW_ovvo(m,b,e,j) - C_ovov(j,b,m,e) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(C_ovov) + +end +#+end_src + +*** cW_vvvv +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + + implicit none + + integer,intent(in) :: nO,nV + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovv(nV,nO,nV,nV) + double precision,intent(in) :: v_vvvv(nV,nV,nV,nV) + + double precision,intent(out) :: cW_vvvv(nV,nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e,f + double precision, allocatable :: A_ovvv(:,:,:,:), B_vvvv(:,:,:,:) + + allocate(A_ovvv(nO,nV,nV,nV), B_vvvv(nV,nV,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,A_ovvv,v_vovv,v_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do d = 1, nV + do c = 1, nV + do b = 1, nV + do a = 1, nV + cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do a=1,nV + do m=1,nO + A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvv, size(A_ovvv,1), & + 0d0, B_vvvv, size(B_vvvv,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvv,B_vvvv) & + !$OMP PRIVATE(a,b,c,d,e,f,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do f=1,nV + do e=1,nV + do b=1,nV + do a=1,nV + cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + deallocate(A_ovvv,B_vvvv) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + call dgemm('T','N', nV*nV, nV*nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovv , size(v_oovv,1) * size(v_oovv,2), & + 1.d0 , cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2)) + +end +#+end_src + +*** cW_vvvf +#+begin_src f90 :comments org :tangle ccsd_spin_orb_sub.irp.f +subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) + + implicit none + + integer,intent(in) :: nO,nV,f + double precision,intent(in) :: t1(nO,nV) + double precision,intent(in) :: t2(nO,nO,nV,nV) + double precision,intent(in) :: tau(nO,nO,nV,nV) + double precision,intent(in) :: v_oovv(nO,nO,nV,nV) + double precision,intent(in) :: v_vovf(nV,nO,nV) + double precision,intent(in) :: v_vvvf(nV,nV,nV) + + double precision,intent(out) :: cW_vvvf(nV,nV,nV) + + integer :: i,j,m,n + integer :: a,b,c,d,e + double precision, allocatable :: A_ovvf(:,:,:), B_vvvf(:,:,:), v_oovf(:,:,:) + double precision :: ti,tf + + allocate(A_ovvf(nO,nV,nV), B_vvvf(nV,nV,nV)) + allocate(v_oovf(nO,nO,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & + !$OMP PRIVATE(a,b,c,d,e,m) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !cW_vvvv(a,b,c,d) = v_vvvv(a,b,c,d) + cW_vvvf(a,b,c) = v_vvvf(a,b,c) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !do f=1,nV + ! do e=1,nV + ! do b=1,nV + ! do a=1,nV + ! do m=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - t1(m,b)*v_vovv(a,m,e,f) + t1(m,a)*v_vovv(b,m,e,f) + ! end do + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e=1,nV + do a=1,nV + do m=1,nO + !A_ovvv(m,a,e,f) = v_vovv(a,m,e,f) + !A_ovvf(m,a,e) = v_vovv(a,m,e,f) + A_ovvf(m,a,e) = v_vovf(a,m,e) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV, nV*nV, nO, & + 1d0, t1 , size(t1,1), & + A_ovvf, size(A_ovvf,1), & + 0d0, B_vvvf, size(B_vvvf,1)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & + !$OMP PRIVATE(a,b,c,d,e,m,n) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do e=1,nV + do b=1,nV + do a=1,nV + !cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) - B_vvvv(b,a,e,f) + B_vvvv(a,b,e,f) + cW_vvvf(a,b,e) = cW_vvvf(a,b,e) - B_vvvf(b,a,e) + B_vvvf(a,b,e) + end do + end do + end do + !$OMP END DO NOWAIT + + !deallocate(A_ovvf,B_vvvf) + + !do a=1,nV + ! do b=1,nV + ! do e=1,nV + ! do f=1,nV + ! + ! do m=1,nO + ! do n=1,nO + ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) + ! end do + ! end do + + ! end do + ! end do + ! end do + !end do + + !$OMP DO collapse(2) + do e = 1, nV + do n = 1, nO + do m = 1, nO + v_oovf(m,n,e) = v_oovv(m,n,e,f) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', nV*nV, nV, nO*nO, & + 0.25d0, tau , size(tau,1) * size(tau,2), & + v_oovf , size(v_oovf,1) * size(v_oovf,2), & + 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) + + deallocate(v_oovf) + deallocate(A_ovvf,B_vvvf) + +end +#+end_src + diff --git a/src/ccsd/org/ccsd_t_space_orb.org b/src/ccsd/org/ccsd_t_space_orb.org new file mode 100644 index 00000000..8709d7be --- /dev/null +++ b/src/ccsd/org/ccsd_t_space_orb.org @@ -0,0 +1,428 @@ +Ref: +Integral-Direct and Parallel Implementation of the CCSD(T) Method: +Algorithmic Developments and Large-Scale Applications +László Gyevi-Nagy, Mihály Kállay, and Péter R. Nagy +J. Chem. Theory Comput. 2020, 16, 1, 366–384 +https://doi.org/10.1021/acs.jctc.9b00957 + +* Dumb way +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + integer :: i,j,k,a,b,c + + allocate(W(nO,nO,nO,nV,nV,nV)) + allocate(V(nO,nO,nO,nV,nV,nV)) + + call form_w(nO,nV,t2,W) + call form_v(nO,nV,t1,W,V) + + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + energy = energy / 3d0 + + deallocate(V,W) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w(nO,nV,t2,W) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,l,a,b,c,d + + W = 0d0 + do c = 1, nV + print*,'W:',c,'/',nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + + do d = 1, nV + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (bd|ai) + ! phys + + cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + + cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + + cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + + cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + + cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + + cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + enddo + + do l = 1, nO + W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + ! chem (ck|jl) + ! phys + - cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + - cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + - cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + - cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + - cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + - cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + enddo + + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v(nO,nV,t1,w,v) + +implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: W(nO, nO, nO, nV, nV, nV) + double precision, intent(out) :: V(nO, nO, nO, nV, nV, nV) + + integer :: i,j,k,a,b,c + + V = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + + cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + + cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + + cc_space_v_vvoo(a,b,i,j) * t1(k,c) + enddo + enddo + enddo + enddo + enddo + enddo + +end +#+END_SRC + +* Better way +** Main +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_ijk + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta_ijk = f_o(i) + f_o(j) + f_o(k) + call form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_ijk) + call form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,W_ijk,V_ijk) + !$OMP PARALLEL & + !$OMP SHARED(energy,nV,i,j,k,W_ijk,V_ijk,f_o,f_v,delta_ijk) & + !$OMP PRIVATE(a,b,c,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta = 1d0 / (delta_ijk - f_v(a) - f_v(b) - f_v(c)) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_ijk(a,b,c) + W_ijk(b,c,a) + W_ijk(c,a,b)) & + * (V_ijk(a,b,c) - V_ijk(c,b,a)) * delta + enddo + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end +#+END_SRC + +** W_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) + + implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) + + integer :: l,a,b,c,d + + !W = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & + !$OMP PRIVATE(a,b,c,d,l) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = 0d0 + + do d = 1, nV + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (bd|ai) + ! phys + !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj + !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik + !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij + !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj + !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + + do l = 1, nO + !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & + W(a,b,c) = W(a,b,c) & + ! chem (ck|jl) + ! phys + !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & + !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj + !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik + !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij + !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj + !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik + - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj + - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik + - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij + - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj + - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC + +** V_ijk +#+BEGIN_SRC f90 :comments org :tangle ccsd_t_space_orb.irp.f +subroutine form_v_ijk(nO,nV,i,j,k,T_vo,X_vvoo,w,v) + +implicit none + + integer, intent(in) :: nO,nV,i,j,k + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nV,nV,nV)!,nO,nO,nO) + double precision, intent(out) :: V(nV,nV,nV)!,nO,nO,nO) + + integer :: a,b,c + + !V = 0d0 + !do i = 1, nO + ! do j = 1, nO + ! do k = 1, nO + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(a,b,c) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do c = 1, nV + do b = 1, nV + do a = 1, nV + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(a,b,c) = W(a,b,c) & + !+ cc_space_v_vvoo(b,c,j,k) * t1(i,a) & + !+ cc_space_v_vvoo(a,c,i,k) * t1(j,b) & + !+ cc_space_v_vvoo(a,b,i,j) * t1(k,c) + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! enddo + ! enddo + !enddo + +end +#+END_SRC diff --git a/src/ccsd/org/ccsd_t_spin_orb.org b/src/ccsd/org/ccsd_t_spin_orb.org new file mode 100644 index 00000000..c9a41abd --- /dev/null +++ b/src/ccsd/org/ccsd_t_spin_orb.org @@ -0,0 +1,385 @@ +* CCSD(T) spin orb + +Ref: +John D. Watts, Jürgen Gauss, and Rodney J. Bartlett +J. Chem. Phys. 98, 8718 (1993) +http://dx.doi.org/10.1063/1.464480 + +** v1 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_vvvo(nV,nV,nV,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3(:,:,:,:,:,:), s(:,:) + double precision :: e_t, e_st, e_dt, delta_abc, delta + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3(nO,nO,nO,nV,nV,nV), s(nO,nV)) + + t3 = 0d0 + + ! T3 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + do e = 1, nV + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(j,k,a,e) * v_vvvo(b,c,e,i) & + - t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + - t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + - t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + - t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + + t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + + t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + + t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + + t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + enddo + do m = 1, nO + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) & + + t2(m,i,b,c) * v_ooov(j,k,m,a) & + - t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + - t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + - t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + - t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + + t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + enddo + t3(i,j,k,a,b,c) = t3(i,j,k,a,b,c) * (1d0 / delta) + enddo + enddo + enddo + enddo + enddo + enddo + + + ! E_T + e_t = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t = e_t + t3(i,j,k,a,b,c) * delta * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_t = e_t / 36d0 + + ! E_ST + s = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + + e_st = 0d0 + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + ! E_DT + e_dt = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt = e_dt + t2(i,j,a,b) * f_ov(k,c) * t3(i,j,k,a,b,c) + enddo + enddo + enddo + enddo + enddo + enddo + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t,e_st,e_dt + energy = e_t + e_st + e_dt + + deallocate(t3,s) + +end +#+end_src + +** v2 +#+begin_src f90 :comments org :tangle ccsd_t_spin_orb.irp.f +subroutine ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + double precision, intent(in) :: v_ooov(nO,nO,nO,nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: t3_bc(:,:,:,:), s(:,:), e_t(:), e_dt(:) + double precision, allocatable :: A_vovv(:,:,:,:), v_vvvo(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), B_ooov(:,:,:,:) + double precision :: e_st, delta_abc, delta, ta, tb + integer :: i,j,k,l,m,a,b,c,d,e + + allocate(t3_bc(nO,nO,nO,nV), s(nO,nV), e_t(nV), e_dt(nV)) + allocate(A_vovv(nV,nO,nV,nV),v_vvvo(nV,nV,nV,nO),T_voov(nV,nO,nO,nV),B_ooov(nO,nO,nO,nV)) + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + nV,nV,nV,nO, v_vvvo) + + ! Init + s = 0d0 + e_t = 0d0 + e_st = 0d0 + e_dt = 0d0 + + call wall_time(ta) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,m,a,b,c,e) & + !$OMP SHARED(A_vovv,ta,tb,t3_bc,s,e_t,e_st,e_dt,t2,v_vvvo,v_ooov, & + !$OMP v_vvoo,f_o,f_v,f_ov,delta,delta_abc,nO,nV,T_voov,B_ooov) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do c = 1, nV + do b = 1, nV + do i = 1, nO + do e = 1, nV + A_vovv(e,i,b,c) = v_vvvo(b,c,e,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do e = 1, nV + T_voov(e,j,k,a) = t2(j,k,a,e) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do m = 1, nO + B_ooov(m,j,k,a) = v_ooov(j,k,m,a) + enddo + enddo + enddo + enddo + !$omp end do + + do c = 1, nV + do b = 1, nV + + ! T3(:,:,:,:,b,c) + ! Init + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + t3_bc(i,j,k,a) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do e = 1, nV + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(j,k,a,e) * v_vvvo(b,c,e,i) & + !- t2(i,k,a,e) * v_vvvo(b,c,e,j) & ! - P(ij) + !- t2(j,i,a,e) * v_vvvo(b,c,e,k) & ! - P(ik) + !- t2(j,k,b,e) * v_vvvo(a,c,e,i) & ! - P(ab) + !- t2(j,k,c,e) * v_vvvo(b,a,e,i) & ! - P(ac) + !+ t2(i,k,b,e) * v_vvvo(a,c,e,j) & ! + P(ij) P(ab) + !+ t2(i,k,c,e) * v_vvvo(b,a,e,j) & ! + P(ij) P(ac) + !+ t2(j,i,b,e) * v_vvvo(a,c,e,k) & ! + P(ik) P(ab) + !+ t2(j,i,c,e) * v_vvvo(b,a,e,k) ! + P(ik) P(ac) + + T_voov(e,j,k,a) * A_vovv(e,i,b,c) & + - T_voov(e,i,k,a) * A_vovv(e,j,b,c) & ! - P(ij) + - T_voov(e,j,i,a) * A_vovv(e,k,b,c) & ! - P(ik) + - T_voov(e,j,k,b) * A_vovv(e,i,a,c) & ! - P(ab) + - T_voov(e,j,k,c) * A_vovv(e,i,b,a) & ! - P(ac) + + T_voov(e,i,k,b) * A_vovv(e,j,a,c) & ! + P(ij) P(ab) + + T_voov(e,i,k,c) * A_vovv(e,j,b,a) & ! + P(ij) P(ac) + + T_voov(e,j,i,b) * A_vovv(e,k,a,c) & ! + P(ik) P(ab) + + T_voov(e,j,i,c) * A_vovv(e,k,b,a) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO collapse(3) + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + do m = 1, nO + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) & + !+ t2(m,i,b,c) * v_ooov(j,k,m,a) & + !- t2(m,j,b,c) * v_ooov(i,k,m,a) & ! - P(ij) + !- t2(m,k,b,c) * v_ooov(j,i,m,a) & ! - P(ik) + !- t2(m,i,a,c) * v_ooov(j,k,m,b) & ! - P(ab) + !- t2(m,i,b,a) * v_ooov(j,k,m,c) & ! - P(ac) + !+ t2(m,j,a,c) * v_ooov(i,k,m,b) & ! + P(ij) P(ab) + !+ t2(m,j,b,a) * v_ooov(i,k,m,c) & ! + P(ij) P(ac) + !+ t2(m,k,a,c) * v_ooov(j,i,m,b) & ! + P(ik) P(ab) + !+ t2(m,k,b,a) * v_ooov(j,i,m,c) ! + P(ik) P(ac) + + t2(m,i,b,c) * B_ooov(m,j,k,a) & + - t2(m,j,b,c) * B_ooov(m,i,k,a) & ! - P(ij) + - t2(m,k,b,c) * B_ooov(m,j,i,a) & ! - P(ik) + - t2(m,i,a,c) * B_ooov(m,j,k,b) & ! - P(ab) + - t2(m,i,b,a) * B_ooov(m,j,k,c) & ! - P(ac) + + t2(m,j,a,c) * B_ooov(m,i,k,b) & ! + P(ij) P(ab) + + t2(m,j,b,a) * B_ooov(m,i,k,c) & ! + P(ij) P(ac) + + t2(m,k,a,c) * B_ooov(m,j,i,b) & ! + P(ik) P(ab) + + t2(m,k,b,a) * B_ooov(m,j,i,c) ! + P(ik) P(ac) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + t3_bc(i,j,k,a) = t3_bc(i,j,k,a) * (1d0 / delta) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! E_T + !$OMP DO + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = f_o(i) + f_o(j) + f_o(k) - delta_abc + e_t(a) = e_t(a) + t3_bc(i,j,k,a) * delta * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_ST + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + s(i,a) = s(i,a) + v_vvoo(b,c,j,k) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + ! E_DT + !$OMP DO + do a = 1, nV + do k = 1, nO + do j = 1, nO + do i = 1, nO + e_dt(a) = e_dt(a) + t2(i,j,a,b) * f_ov(k,c) * t3_bc(i,j,k,a) + enddo + enddo + enddo + enddo + !$OMP END DO + enddo + !$OMP MASTER + call wall_time(tb) + write(*,'(A1,F6.2,A5,F10.2,A2)') ' ', dble(c)/dble(nV)*100d0, '% in ', tb-ta, ' s' + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + do a = 2, nV + e_t(1) = e_t(1) + e_t(a) + enddo + + do a = 2, nV + e_dt(1) = e_dt(1) + e_dt(a) + enddo + + e_t = e_t / 36d0 + + do a = 1, nV + do i = 1, nO + e_st = e_st + s(i,a) * t1(i,a) + enddo + enddo + e_st = e_st * 0.25d0 + + e_dt = e_dt * 0.25d0 + + ! (T) + !print*,e_t(1),e_st,e_dt(1) + energy = e_t(1) + e_st + e_dt(1) + + deallocate(t3_bc,s) + +end +#+end_src diff --git a/src/cipsi/NEED b/src/cipsi/NEED index 85d01f79..89c128ec 100644 --- a/src/cipsi/NEED +++ b/src/cipsi/NEED @@ -1,5 +1,7 @@ +json perturbation zmq mpi iterations csf +mol_properties diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 6e715531..cf770049 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -16,7 +16,6 @@ subroutine run_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -76,7 +75,6 @@ subroutine run_cipsi ) write(*,'(A)') '--------------------------------------------------------------------------------' - to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) if (do_pt2) then @@ -106,9 +104,10 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() - N_iter += 1 + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -129,13 +128,13 @@ subroutine run_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + ! If stopped because N_det > N_det_max, do an extra iteration to compute the PT2 + if ((.not.qp_stop()).and. & + (N_det > N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.& + (correlation_energy_ratio <= correlation_energy_ratio_max) & + ) then if (do_pt2) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) @@ -154,10 +153,13 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep(1:N_states), & pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) end + diff --git a/src/cipsi/environment.irp.f b/src/cipsi/environment.irp.f index 5c0e0820..363b8f1c 100644 --- a/src/cipsi/environment.irp.f +++ b/src/cipsi/environment.irp.f @@ -7,7 +7,9 @@ BEGIN_PROVIDER [ integer, nthreads_pt2 ] character*(32) :: env call getenv('QP_NTHREADS_PT2',env) if (trim(env) /= '') then + call lock_io() read(env,*) nthreads_pt2 + call unlock_io() call write_int(6,nthreads_pt2,'Target number of threads for PT2') endif END_PROVIDER diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 62d7c52c..6f40a809 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -312,9 +312,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end do deallocate(indices) -! !$OMP CRITICAL -! print *, 'Step1: ', i_generator, preinteresting(0) -! !$OMP END CRITICAL allocate(banned(mo_num, mo_num,2), bannedOrb(mo_num, 2)) allocate (mat(N_states, mo_num, mo_num)) @@ -466,17 +463,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d fullinteresting(sze+1) = i end if end do - allocate (fullminilist (N_int, 2, fullinteresting(0)), & minilist (N_int, 2, interesting(0)) ) -! if(pert_2rdm)then -! allocate(coef_fullminilist_rev(N_states,fullinteresting(0))) -! do i=1,fullinteresting(0) -! do j = 1, N_states -! coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j) -! enddo -! enddo -! endif do i=1,fullinteresting(0) fullminilist(:,:,i) = psi_det_sorted(:,:,fullinteresting(i)) @@ -524,33 +512,19 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle -! !$OMP CRITICAL -! print *, 'Step3: ', i_generator, h1, interesting(0) -! !$OMP END CRITICAL call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) - -! if(.not.pert_2rdm)then - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) -! else -! call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) -! endif + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if enddo if(s1 /= s2) monoBdo = .false. enddo deallocate(fullminilist,minilist) -! if(pert_2rdm)then -! deallocate(coef_fullminilist_rev) -! endif enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine - - - subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types @@ -606,18 +580,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! to a determinant of the future. In that case, the determinant will be ! detected as already generated when generating in the future with a ! double excitation. -! -! if (.not.do_singles) then -! if ((h1 == p1) .or. (h2 == p2)) then -! cycle -! endif -! endif -! -! if (.not.do_doubles) then -! if ((h1 /= p1).and.(h2 /= p2)) then -! cycle -! endif -! endif ! ----- if(bannedOrb(p2, s2)) cycle @@ -974,13 +936,10 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then -! call get_d2_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then -! call get_d1_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else -! call get_d0_reference(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) end if else if(nt == 4) then @@ -1540,8 +1499,6 @@ subroutine past_d2(banned, p, sp) end if end - - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 781fcda6..339f7084 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -15,7 +15,6 @@ subroutine run_stochastic_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -96,9 +95,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() - N_iter += 1 + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -118,13 +118,13 @@ subroutine run_stochastic_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + ! If stopped because N_det > N_det_max, do an extra iteration to compute the PT2 + if ((.not.qp_stop()).and. & + (N_det > N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and.& + (correlation_energy_ratio <= correlation_energy_ratio_max) & + ) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) @@ -134,8 +134,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep, & pt2_data , pt2_data_err, N_det, N_configuration, N_states, psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/write_cipsi_json.irp.f b/src/cipsi/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED index 4dd1af36..8f05be69 100644 --- a/src/cipsi_tc_bi_ortho/NEED +++ b/src/cipsi_tc_bi_ortho/NEED @@ -1,6 +1,7 @@ +json mpi perturbation zmq -iterations_tc +iterations csf tc_bi_ortho diff --git a/src/cipsi_tc_bi_ortho/cipsi.irp.f b/src/cipsi_tc_bi_ortho/cipsi.irp.f index b1941068..fb907cb3 100644 --- a/src/cipsi_tc_bi_ortho/cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f @@ -64,7 +64,7 @@ subroutine run_cipsi endif if (N_det > N_det_max) then - psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) N_det = N_det_max soft_touch N_det psi_det psi_coef diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f index e7dca456..13b4dff4 100644 --- a/src/cipsi_tc_bi_ortho/pt2.irp.f +++ b/src/cipsi_tc_bi_ortho/pt2.irp.f @@ -52,7 +52,7 @@ subroutine pt2_tc_bi_ortho ! call routine_save_right if (N_det > N_det_max) then - psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) + psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) N_det = N_det_max soft_touch N_det psi_det psi_coef diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f index 027b74c5..284b2bc8 100644 --- a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f +++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f @@ -134,7 +134,6 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) PROVIDE psi_det_hii selection_weight pseudo_sym PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max PROVIDE excitation_beta_max excitation_alpha_max excitation_max - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp if (h0_type == 'CFG') then PROVIDE psi_configuration_hii det_to_configuration diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 13e6c510..4c271a4b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -181,7 +181,6 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE banned_excitation @@ -464,15 +463,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock do i = 1, fullinteresting(0) do k = 1, N_int - fullminilist(k,1,i) = psi_det_sorted_tc(k,1,fullinteresting(i)) - fullminilist(k,2,i) = psi_det_sorted_tc(k,2,fullinteresting(i)) + fullminilist(k,1,i) = psi_selectors(k,1,fullinteresting(i)) + fullminilist(k,2,i) = psi_selectors(k,2,fullinteresting(i)) enddo enddo do i = 1, interesting(0) do k = 1, N_int - minilist(k,1,i) = psi_det_sorted_tc(k,1,interesting(i)) - minilist(k,2,i) = psi_det_sorted_tc(k,2,interesting(i)) + minilist(k,1,i) = psi_selectors(k,1,interesting(i)) + minilist(k,2,i) = psi_selectors(k,2,interesting(i)) enddo enddo @@ -616,7 +615,6 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp mat = 0d0 @@ -628,7 +626,10 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do +! print*,'in selection ' do i = 1, N_sel +! call debug_det(det(1,1,i),N_int) +! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif @@ -674,9 +675,6 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) end do -! call get_d3_h ( det(1,1,i), bannedOrb, banned, mat , mask, p, sp, psi_selectors_coef_transp_tc (1, interesting(i)) ) -! call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_r, mat_l, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) & -! , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) ) call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) @@ -916,8 +914,29 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = mat_l(istate, p1, p2) alpha_h_psi = mat_r(istate, p1, p2) endif - coef(istate) = alpha_h_psi / delta_E - e_pert(istate) = coef(istate) * psi_h_alpha + val = 4.d0 * psi_h_alpha * alpha_h_psi + tmp = dsqrt(delta_E * delta_E + val) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif + e_pert(istate) = 0.25 * val / delta_E +! e_pert(istate) = 0.5d0 * (tmp - delta_E) + if(dsqrt(dabs(tmp)).gt.1.d-4.and.dabs(alpha_h_psi).gt.1.d-4)then + coef(istate) = e_pert(istate) / psi_h_alpha + else + coef(istate) = alpha_h_psi / delta_E + endif + + if(selection_tc == 1)then + if(e_pert(istate).lt.0.d0)then + e_pert(istate)=0.d0 + else + e_pert(istate)=-e_pert(istate) + endif + else if(selection_tc == -1)then + if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 + endif + ! if(selection_tc == 1 )then ! if(e_pert(istate).lt.0.d0)then ! e_pert(istate) = 0.d0 @@ -930,7 +949,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d enddo - do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1 do istate = 1, N_states diff --git a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f index 10132086..0bd51464 100644 --- a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f +++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f @@ -125,7 +125,11 @@ subroutine merge_selection_buffers(b1, b2) enddo b2%det => detmp b2%val => val - b2%mini = min(b2%mini,b2%val(b2%N)) +! if(selection_tc == 1)then +! b2%mini = max(b2%mini,b2%val(b2%N)) +! else + b2%mini = min(b2%mini,b2%val(b2%N)) +! endif b2%cur = nmwen end @@ -157,7 +161,11 @@ subroutine sort_selection_buffer(b) end do deallocate(b%det,iorder) b%det => detmp - b%mini = min(b%mini,b%val(b%N)) +! if(selection_tc == 1)then +! b%mini = max(b%mini,b%val(b%N)) +! else + b%mini = min(b%mini,b%val(b%N)) +! endif b%cur = nmwen end subroutine diff --git a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f index c3a49280..6343bf8b 100644 --- a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f @@ -17,7 +17,6 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context PROVIDE psi_det psi_coef threshold_generators state_average_weight @@ -312,7 +311,6 @@ subroutine run_slave_main PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc - PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index c1e4af0c..a06f28e9 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -54,7 +54,7 @@ subroutine run_stochastic_cipsi ! if (N_det > N_det_max) then -! psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det) +! psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) ! psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) ! N_det = N_det_max ! soft_touch N_det psi_det psi_coef @@ -78,6 +78,8 @@ subroutine run_stochastic_cipsi (N_det < N_det_max) .and. & (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) & ) + print*,'maxval(abs(pt2_data % pt2(1:N_states)))',maxval(abs(pt2_data % pt2(1:N_states))) + print*,pt2_max write(*,'(A)') '--------------------------------------------------------------------------------' @@ -92,7 +94,15 @@ subroutine run_stochastic_cipsi call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop - N_iter += 1 + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) + + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) + call print_extrapolated_energy() +! call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -106,6 +116,7 @@ subroutine run_stochastic_cipsi ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1)) call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! stop if (qp_stop()) exit enddo ! print*,'data to extrapolate ' diff --git a/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats index 69b862b0..6b8fddb6 100644 --- a/src/cisd/30.cisd.bats +++ b/src/cisd/30.cisd.bats @@ -10,6 +10,7 @@ function run() { qp set determinants n_states 2 qp set davidson threshold_davidson 1.e-12 qp set davidson n_states_diag 24 + qp run cis qp run cisd energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)" energy2="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 2)" @@ -20,26 +21,31 @@ function run() { @test "B-B" { # qp set_file b2_stretched.ezfio + qp set_frozen_core run -49.120607088648597 -49.055152453388231 } @test "SiH2_3B1" { # 1.53842s 3.53856s qp set_file sih2_3b1.ezfio + qp set_frozen_core run -290.015949171697 -289.805036176618 } @test "HBO" { # 4.42968s 19.6099s qp set_file hbo.ezfio + qp set_frozen_core run -100.2019254455993 -99.79484127741013 } @test "HCO" { # 6.6077s 28.6801s qp set_file hco.ezfio + qp set_frozen_core run -113.39088802205114 -113.22204293108558 } @test "H2O" { # 7.0651s 30.6642s qp set_file h2o.ezfio + qp set_frozen_core run -76.22975602077072 -75.80609108747208 } @@ -50,6 +56,7 @@ function run() { @test "H2S" { # 7.42152s 32.5461s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio + qp set_frozen_core run -398.853701416768 -398.519020035337 } @@ -70,6 +77,7 @@ function run() { @test "OH" { # 18.2159s 1.28453m [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio + qp set_frozen_core run -75.6087472926588 -75.5370393736601 } @@ -83,6 +91,7 @@ function run() { @test "SiH3" { # 20.2202s 1.38648m [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio + qp set_frozen_core run -5.57096611856522 -5.30950347928823 } @@ -103,6 +112,7 @@ function run() { @test "H3COH" { # 24.7248s 1.85043m [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio + qp set_frozen_core run -115.204958752377 -114.755913828245 } @@ -117,6 +127,7 @@ function run() { @test "ClF" { # 30.3225s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio + qp set_frozen_core run -559.162476603880 -558.792395927088 } @@ -130,6 +141,7 @@ function run() { @test "ClO" { # 37.6949s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio + qp set_frozen_core run -534.5404021326773 -534.3818725793897 } @@ -150,6 +162,7 @@ function run() { @test "SO" { # 51.2476s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio + qp set_frozen_core run -26.0131812819785 -25.7053111980226 } diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f index fca3b10e..5f167686 100644 --- a/src/cisd/cisd.irp.f +++ b/src/cisd/cisd.irp.f @@ -69,7 +69,9 @@ subroutine run do i = 1,N_states k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1) delta_E = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int) - cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2) + if (elec_alpha_num + elec_beta_num >= 4) then + cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2) + endif enddo print *, 'N_det = ', N_det print*,'' @@ -78,26 +80,43 @@ subroutine run do i = 1,N_states print *, i, CI_energy(i) enddo - print*,'' - print*,'******************************' - print *, 'CISD+Q Energies' - do i = 1,N_states - print *, i, cisdq(i) - enddo + if (elec_alpha_num + elec_beta_num >= 4) then + print*,'' + print*,'******************************' + print *, 'CISD+Q Energies' + do i = 1,N_states + print *, i, cisdq(i) + enddo + endif if (N_states > 1) then - print*,'' - print*,'******************************' - print*,'Excitation energies (au) (CISD+Q)' - do i = 2, N_states - print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1) - enddo - print*,'' - print*,'******************************' - print*,'Excitation energies (eV) (CISD+Q)' - do i = 2, N_states - print*, i ,(CI_energy(i) - CI_energy(1))/0.0367502d0, & - (cisdq(i) - cisdq(1)) / 0.0367502d0 - enddo + if (elec_alpha_num + elec_beta_num >= 4) then + print*,'' + print*,'******************************' + print*,'Excitation energies (au) (CISD+Q)' + do i = 2, N_states + print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (CISD+Q)' + do i = 2, N_states + print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, & + (cisdq(i) - cisdq(1)) * ha_to_ev + enddo + else + print*,'' + print*,'******************************' + print*,'Excitation energies (au) (CISD)' + do i = 2, N_states + print*, i ,CI_energy(i) - CI_energy(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (CISD)' + do i = 2, N_states + print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev + enddo + endif endif end diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index e627dfc9..399ab11b 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -150,7 +150,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, exit endif if(task_id == 0) exit + call lock_io() read (msg,*) imin, imax, ishift, istep + call unlock_io() integer :: k do k=imin,imax v_t(:,k) = 0.d0 @@ -555,7 +557,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io() read(env,*) nthreads_davidson + call unlock_io() call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..346f1cf9 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -466,6 +466,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ double precision, allocatable :: work(:) y = h +! y = h_p ! Doesn't work for non-singlets lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/davidson/input.irp.f b/src/davidson/input.irp.f index aba88ae9..b37c87d0 100644 --- a/src/davidson/input.irp.f +++ b/src/davidson/input.irp.f @@ -30,10 +30,5 @@ BEGIN_PROVIDER [ integer, n_states_diag ] endif IRP_ENDIF - call write_time(6) - if (mpi_master) then - write(6, *) 'Read n_states_diag' - endif - END_PROVIDER diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index d01ad1c7..078c2104 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -69,9 +69,15 @@ subroutine resize_H_apply_buffer(new_size,iproc) END_DOC PROVIDE H_apply_buffer_allocated + ASSERT (new_size > 0) ASSERT (iproc >= 0) ASSERT (iproc < nproc) + if (N_det < 0) call abort() !irp_here//': N_det < 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (new_size <= 0) call abort() !irp_here//': new_size <= 0') + if (iproc < 0) call abort() !irp_here//': iproc < 0') + if (iproc >= nproc) call abort() !irp_here//': iproc >= nproc') allocate ( buffer_det(N_int,2,new_size), & buffer_coef(new_size,N_states), & @@ -126,31 +132,34 @@ subroutine copy_H_apply_buffer_to_wf ASSERT (N_int > 0) - ASSERT (N_det > 0) + ASSERT (N_det >= 0) - allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) + N_det_old = N_det + if (N_det > 0) then + allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) - ! Backup determinants - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j+=1 - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) - buffer_det(:,:,j) = psi_det(:,:,i) - enddo - N_det_old = j + ! Backup determinants + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j+=1 + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) + buffer_det(:,:,j) = psi_det(:,:,i) + enddo + N_det_old = j - ! Backup coefficients - do k=1,N_states - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j += 1 - buffer_coef(j,k) = psi_coef(i,k) - enddo - ASSERT ( j == N_det_old ) - enddo + ! Backup coefficients + do k=1,N_states + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j += 1 + buffer_coef(j,k) = psi_coef(i,k) + enddo + ASSERT ( j == N_det_old ) + enddo + endif ! Update N_det N_det = N_det_old @@ -164,17 +173,19 @@ subroutine copy_H_apply_buffer_to_wf TOUCH psi_det_size endif - ! Restore backup in resized array - do i=1,N_det_old - psi_det(:,:,i) = buffer_det(:,:,i) - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) - enddo - do k=1,N_states + if (N_det_old > 0) then + ! Restore backup in resized array do i=1,N_det_old - psi_coef(i,k) = buffer_coef(i,k) + psi_det(:,:,i) = buffer_det(:,:,i) + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) enddo - enddo + do k=1,N_states + do i=1,N_det_old + psi_coef(i,k) = buffer_coef(i,k) + enddo + enddo + endif ! Copy new buffers @@ -339,3 +350,33 @@ subroutine fill_H_apply_buffer_no_selection(n_selected,det_buffer,Nint,iproc) call omp_unset_lock(H_apply_buffer_lock(1,iproc)) end + +subroutine replace_wf(N_det_new, LDA, psi_coef_new, psi_det_new) + use omp_lib + implicit none + BEGIN_DOC +! Replaces the wave function. +! After calling this subroutine, N_det, psi_det and psi_coef need to be touched + END_DOC + integer, intent(in) :: N_det_new, LDA + double precision, intent(in) :: psi_coef_new(LDA,N_states) + integer(bit_kind), intent(in) :: psi_det_new(N_int,2,N_det_new) + + integer :: i,j + + PROVIDE H_apply_buffer_allocated + + if (N_det_new <= 0) call abort() !irp_here//': N_det_new <= 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (LDA < N_det_new) call abort() !irp_here//': LDA < N_det_new') + + do j=0,nproc-1 + H_apply_buffer(j)%N_det = 0 + enddo + N_det = 0 + SOFT_TOUCH N_det + call fill_H_apply_buffer_no_selection(N_det_new,psi_det_new,N_int,0) + call copy_h_apply_buffer_to_wf + psi_coef(1:N_det_new,1:N_states) = psi_coef_new(1:N_det_new,1:N_states) + +end diff --git a/src/determinants/tr_density_matrix.irp.f b/src/determinants/tr_density_matrix.irp.f new file mode 100644 index 00000000..1e94edcb --- /dev/null +++ b/src/determinants/tr_density_matrix.irp.f @@ -0,0 +1,313 @@ +BEGIN_PROVIDER [double precision, one_e_tr_dm_mo, (mo_num, mo_num, N_states, N_states)] + + implicit none + + BEGIN_DOC + ! One body transition density matrix for all pairs of states n and m, < Psi^n | a_i^\dagger a_a | Psi^m > + END_DOC + + integer :: j,k,l,m,k_a,k_b,n + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck, cl, ckl + double precision :: phase + integer :: h1,h2,p1,p2,s1,s2, degree + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + integer :: exc(0:2,2),n_occ(2) + double precision, allocatable :: tmp_a(:,:,:,:), tmp_b(:,:,:,:) + integer :: krow, kcol, lrow, lcol + + PROVIDE psi_det + + one_e_tr_dm_mo = 0d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,k_a,k_b,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& + !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& + !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & + !$OMP elec_beta_num,one_e_tr_dm_mo,N_det,& + !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& + !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,& + !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) + allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) ) + tmp_a = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_a=1,N_det + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n) + do l=1,elec_alpha_num + j = occ(l,1) + tmp_a(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_a == N_det) cycle + l = k_a+1 + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lcol == kcol ) + tmp_det2(:) = psi_det_alpha_unique(:, lrow) + call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase + tmp_a(h1,p1,m,n) += ckl + ckl = psi_bilinear_matrix_values(k_a,n)*psi_bilinear_matrix_values(l,m) * phase + tmp_a(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + one_e_tr_dm_mo(:,:,:,:) = one_e_tr_dm_mo(:,:,:,:) + tmp_a(:,:,:,:) + !$OMP END CRITICAL + deallocate(tmp_a) + !$OMP BARRIER + + tmp_b = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_b=1,N_det + krow = psi_bilinear_matrix_transp_rows(k_b) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_transp_columns(k_b) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n) + do l=1,elec_beta_num + j = occ(l,2) + tmp_b(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lrow == krow ) + tmp_det2(:) = psi_det_beta_unique(:, lcol) + call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase + tmp_b(h1,p1,m,n) += ckl + ckl = psi_bilinear_matrix_transp_values(k_b,n)*psi_bilinear_matrix_transp_values(l,m) * phase + tmp_b(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + one_e_tr_dm_mo(:,:,:,:) = one_e_tr_dm_mo(:,:,:,:) + tmp_b(:,:,:,:) + !$OMP END CRITICAL + + deallocate(tmp_b) + !$OMP END PARALLEL + +END_PROVIDER + BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_alpha, (mo_num,mo_num,N_states,N_states) ] +&BEGIN_PROVIDER [ double precision, one_e_tr_dm_mo_beta, (mo_num,mo_num,N_states,N_states) ] + implicit none + BEGIN_DOC + ! $\alpha$ and $\beta$ one-body transition density matrices for all pairs of states + END_DOC + + integer :: j,k,l,m,n,k_a,k_b + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck, cl, ckl + double precision :: phase + integer :: h1,h2,p1,p2,s1,s2, degree + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + integer :: exc(0:2,2),n_occ(2) + double precision, allocatable :: tmp_a(:,:,:,:), tmp_b(:,:,:,:) + integer :: krow, kcol, lrow, lcol + + PROVIDE psi_det + + one_e_tr_dm_mo_alpha = 0.d0 + one_e_tr_dm_mo_beta = 0.d0 + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,k_a,k_b,l,m,n,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc,& + !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& + !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num, & + !$OMP elec_beta_num,one_e_tr_dm_mo_alpha,one_e_tr_dm_mo_beta,N_det,& + !$OMP mo_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns,& + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns,& + !$OMP psi_bilinear_matrix_order_reverse, psi_det_alpha_unique, psi_det_beta_unique,& + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values,& + !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) + allocate(tmp_a(mo_num,mo_num,N_states,N_states), tmp_b(mo_num,mo_num,N_states,N_states) ) + tmp_a = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_a=1,N_det + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(k_a,n) + do l=1,elec_alpha_num + j = occ(l,1) + tmp_a(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_a == N_det) cycle + l = k_a+1 + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lcol == kcol ) + tmp_det2(:) = psi_det_alpha_unique(:, lrow) + call get_excitation_degree_spin(tmp_det(1,1),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_values(k_a,m)*psi_bilinear_matrix_values(l,n) * phase + tmp_a(h1,p1,m,n) += ckl + tmp_a(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + one_e_tr_dm_mo_alpha(:,:,:,:) = one_e_tr_dm_mo_alpha(:,:,:,:) + tmp_a(:,:,:,:) + !$OMP END CRITICAL + deallocate(tmp_a) + + tmp_b = 0.d0 + !$OMP DO SCHEDULE(dynamic,64) + do k_b=1,N_det + krow = psi_bilinear_matrix_transp_rows(k_b) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_transp_columns(k_b) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + ! ------------- + + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + do m=1,N_states + do n = 1, N_states + ck = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(k_b,n) + do l=1,elec_beta_num + j = occ(l,2) + tmp_b(j,j,m,n) += ck + enddo + enddo + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while ( lrow == krow ) + tmp_det2(:) = psi_det_beta_unique(:, lcol) + call get_excitation_degree_spin(tmp_det(1,2),tmp_det2,degree,N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2),tmp_det2,exc,phase,N_int) + call decode_exc_spin(exc,h1,p1,h2,p2) + do m=1,N_states + do n = 1, N_states + ckl = psi_bilinear_matrix_transp_values(k_b,m)*psi_bilinear_matrix_transp_values(l,n) * phase + tmp_b(h1,p1,m,n) += ckl + tmp_b(p1,h1,m,n) += ckl + enddo + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + one_e_tr_dm_mo_beta(:,:,:,:) = one_e_tr_dm_mo_beta(:,:,:,:) + tmp_b(:,:,:,:) + !$OMP END CRITICAL + + deallocate(tmp_b) + !$OMP END PARALLEL + +END_PROVIDER + diff --git a/src/ezfio_files/lock.irp.f b/src/ezfio_files/lock.irp.f index 53a99254..d28f7641 100644 --- a/src/ezfio_files/lock.irp.f +++ b/src/ezfio_files/lock.irp.f @@ -9,4 +9,21 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), file_lock ] call omp_init_lock(file_lock) END_PROVIDER +! These functions need to be called because internal read and write are not thread safe. +subroutine lock_io() + implicit none + BEGIN_DOC +! Needs to be called because before doing I/O because internal read and write +! are not thread safe. + END_DOC + call omp_set_lock(file_lock) +end subroutine lock_io() +subroutine unlock_io() + implicit none + BEGIN_DOC +! Needs to be called because afterdoing I/O because internal read and write +! are not thread safe. + END_DOC + call omp_unset_lock(file_lock) +end subroutine lock_io() diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index 9d9c0b7d..bb2a93f8 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -39,12 +39,19 @@ program fci if (.not.is_zmq_slave) then PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + write(json_unit,json_array_open_fmt) 'fci' + if (do_pt2) then call run_stochastic_cipsi else call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_two_e_integrals_in_map pt2_min_parallel_tasks diff --git a/src/fci_tc_bi/NEED b/src/fci_tc_bi/NEED index 000b0deb..3bb9515a 100644 --- a/src/fci_tc_bi/NEED +++ b/src/fci_tc_bi/NEED @@ -1,3 +1,4 @@ +json tc_bi_ortho davidson_undressed cipsi_tc_bi_ortho diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index 56c561ac..c8369e93 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -49,9 +49,8 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j)) enddo enddo - SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth psi_coef psi_l_coef_bi_ortho psi_r_coef_bi_ortho - - + SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth + SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef call save_tc_bi_ortho_wavefunction end diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index 84ac8166..ed75c882 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -62,6 +62,7 @@ subroutine run_cipsi_tc endif endif ! --- + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then call run_stochastic_cipsi @@ -69,6 +70,11 @@ subroutine run_cipsi_tc call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks if(elec_alpha_num+elec_beta_num.ge.3)then diff --git a/src/fci_tc_bi/generators.irp.f b/src/fci_tc_bi/generators.irp.f index 55c0cbb9..bf972423 100644 --- a/src/fci_tc_bi/generators.irp.f +++ b/src/fci_tc_bi/generators.irp.f @@ -43,9 +43,14 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - psi_det_sorted_tc_gen = psi_det_sorted_tc + psi_det_sorted_tc_gen = psi_det_sorted_tc psi_coef_sorted_tc_gen = psi_coef_sorted_tc psi_det_sorted_tc_gen_order = psi_det_sorted_tc_order + integer :: i +! do i = 1,N_det +! print*,'i = ',i +! call debug_det(psi_det_sorted_tc(1,1,i),N_int) +! enddo END_PROVIDER diff --git a/src/fci_tc_bi/save_energy.irp.f b/src/fci_tc_bi/save_energy.irp.f index 7c41d00f..421ae5f8 100644 --- a/src/fci_tc_bi/save_energy.irp.f +++ b/src/fci_tc_bi/save_energy.irp.f @@ -4,6 +4,6 @@ subroutine save_energy(E,pt2) ! Saves the energy in |EZFIO|. END_DOC double precision, intent(in) :: E(N_states), pt2(N_states) - call ezfio_set_fci_tc_energy(E(1:N_states)) - call ezfio_set_fci_tc_energy_pt2(E(1:N_states)+pt2(1:N_states)) + call ezfio_set_fci_tc_bi_energy(E(1:N_states)) + call ezfio_set_fci_tc_bi_energy_pt2(E(1:N_states)+pt2(1:N_states)) end diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index af1176d2..4d3de7d0 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -18,15 +18,6 @@ BEGIN_PROVIDER [ integer, N_det_selectors] double precision :: norm, norm_max call write_time(6) N_det_selectors = N_det - norm = 1.d0 - do i=1,N_det - norm = norm - psi_average_norm_contrib_tc(i) - if (norm - 1.d-10 < 1.d0 - threshold_selectors) then - N_det_selectors = i - exit - endif - enddo - N_det_selectors = max(N_det_selectors,N_det_generators) call write_int(6,N_det_selectors,'Number of selectors') END_PROVIDER @@ -47,11 +38,9 @@ END_PROVIDER enddo do k=1,N_states do i=1,N_det_selectors - psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) + psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k) psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k) psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k) -! psi_selectors_coef_tc(i,1,k) = 1.d0 -! psi_selectors_coef_tc(i,2,k) = 1.d0 enddo enddo @@ -74,25 +63,6 @@ END_PROVIDER enddo END_PROVIDER - BEGIN_PROVIDER [ double precision, psi_selectors_rcoef_bi_orth_transp, (N_states, psi_det_size) ] -&BEGIN_PROVIDER [ double precision, psi_selectors_lcoef_bi_orth_transp, (N_states, psi_det_size) ] - - implicit none - integer :: i, k - - psi_selectors_rcoef_bi_orth_transp = 0.d0 - psi_selectors_lcoef_bi_orth_transp = 0.d0 - - print*,'N_det,N_det_selectors',N_det,N_det_selectors - do i = 1, N_det_selectors - do k = 1, N_states - psi_selectors_rcoef_bi_orth_transp(k,i) = psi_r_coef_sorted_bi_ortho(i,k) - psi_selectors_lcoef_bi_orth_transp(k,i) = psi_l_coef_sorted_bi_ortho(i,k) - enddo - enddo - -END_PROVIDER - BEGIN_PROVIDER [ integer, psi_selectors_size ] implicit none psi_selectors_size = psi_det_size diff --git a/src/hartree_fock/NEED b/src/hartree_fock/NEED index 2b3fa238..e168bd80 100644 --- a/src/hartree_fock/NEED +++ b/src/hartree_fock/NEED @@ -1,3 +1,4 @@ ao_one_e_ints ao_two_e_ints scf_utils +json diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index d7d8fa7d..8c6658c5 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -15,115 +15,59 @@ double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:) double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:) - ao_two_e_integral_alpha = 0.d0 - ao_two_e_integral_beta = 0.d0 - if (do_direct_integrals) then + if (do_ao_cholesky) then ! Use Cholesky-decomposed integrals + ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:) + ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:) - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0, & - !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2, & - !$OMP local_threshold)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz, & - !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) + else ! Use integrals in AO basis set - allocate(keys(1), values(1)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 + ao_two_e_integral_alpha = 0.d0 + ao_two_e_integral_beta = 0.d0 + if (do_direct_integrals) then - q = ao_num*ao_num*ao_num*ao_num - !$OMP DO SCHEDULE(static,64) - do p=1_8,q - call two_e_integrals_index_reverse(kk,ii,ll,jj,p) - if ( (kk(1)>ao_num).or. & - (ii(1)>ao_num).or. & - (jj(1)>ao_num).or. & - (ll(1)>ao_num) ) then - cycle - endif - k = kk(1) - i = ii(1) - l = ll(1) - j = jj(1) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0,& + !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2,& + !$OMP local_threshold) & + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz,& + !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) - logical, external :: ao_two_e_integral_zero - if (ao_two_e_integral_zero(i,k,j,l)) then - cycle - endif - local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) - if (local_threshold < ao_integrals_threshold) then - cycle - endif - i0 = i - j0 = j - k0 = k - l0 = l - values(1) = 0.d0 - local_threshold = ao_integrals_threshold/local_threshold - do k2=1,8 - if (kk(k2)==0) then - cycle - endif - i = ii(k2) - j = jj(k2) - k = kk(k2) - l = ll(k2) - c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) - c1 = SCF_density_matrix_ao_alpha(k,i) - c2 = SCF_density_matrix_ao_beta(k,i) - if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then - cycle - endif - if (values(1) == 0.d0) then - values(1) = ao_two_e_integral(k0,l0,i0,j0) - endif - integral = c0 * values(1) - ao_two_e_integral_alpha_tmp(i,j) += integral - ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(1) - ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral - ao_two_e_integral_beta_tmp (l,j) -= c2 * integral - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL - else - PROVIDE ao_two_e_integrals_in_map + allocate(keys(1), values(1)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 - integer(omp_lock_kind) :: lck(ao_num) - integer(map_size_kind) :: i8 - integer :: ii(8), jj(8), kk(8), ll(8), k2 - integer(cache_map_size_kind) :: n_elements_max, n_elements - integer(key_kind), allocatable :: keys(:) - double precision, allocatable :: values(:) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, & - !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) - - call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) - allocate(keys(n_elements_max), values(n_elements_max)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 - - !$OMP DO SCHEDULE(static,1) - do i8=0_8,ao_integrals_map%map_size - n_elements = n_elements_max - call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) - do k1=1,n_elements - call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + q = ao_num*ao_num*ao_num*ao_num + !$OMP DO SCHEDULE(static,64) + do p=1_8,q + call two_e_integrals_index_reverse(kk,ii,ll,jj,p) + if ( (kk(1)>ao_num).or. & + (ii(1)>ao_num).or. & + (jj(1)>ao_num).or. & + (ll(1)>ao_num) ) then + cycle + endif + k = kk(1) + i = ii(1) + l = ll(1) + j = jj(1) + logical, external :: ao_two_e_integral_zero + if (ao_two_e_integral_zero(i,k,j,l)) then + cycle + endif + local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) + if (local_threshold < ao_integrals_threshold) then + cycle + endif + i0 = i + j0 = j + k0 = k + l0 = l + values(1) = 0.d0 + local_threshold = ao_integrals_threshold/local_threshold do k2=1,8 if (kk(k2)==0) then cycle @@ -132,25 +76,162 @@ j = jj(k2) k = kk(k2) l = ll(k2) - integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) + c1 = SCF_density_matrix_ao_alpha(k,i) + c2 = SCF_density_matrix_ao_beta(k,i) + if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then + cycle + endif + if (values(1) == 0.d0) then + values(1) = ao_two_e_integral(k0,l0,i0,j0) + endif + integral = c0 * values(1) ao_two_e_integral_alpha_tmp(i,j) += integral ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(k1) - ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral - ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + integral = values(1) + ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral + ao_two_e_integral_beta_tmp (l,j) -= c2 * integral enddo enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + else + PROVIDE ao_two_e_integrals_in_map + integer(omp_lock_kind) :: lck(ao_num) + integer(map_size_kind) :: i8 + integer :: ii(8), jj(8), kk(8), ll(8), k2 + integer(cache_map_size_kind) :: n_elements_max, n_elements + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max,& + !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 + + !$OMP DO SCHEDULE(static,1) + do i8=0_8,ao_integrals_map%map_size + n_elements = n_elements_max + call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) + do k1=1,n_elements + call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + + do k2=1,8 + if (kk(k2)==0) then + cycle + endif + i = ii(k2) + j = jj(k2) + k = kk(k2) + l = ll(k2) + integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + ao_two_e_integral_alpha_tmp(i,j) += integral + ao_two_e_integral_beta_tmp (i,j) += integral + integral = values(k1) + ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral + ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + + endif endif +END_PROVIDER + + BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha_chol, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta_chol , (ao_num, ao_num) ] + use map_module + implicit none + BEGIN_DOC + ! Alpha and Beta Fock matrices in AO basis set + END_DOC + + integer :: m,n,l,s,j + double precision :: integral + double precision, allocatable :: X(:), X2(:,:,:,:), X3(:,:,:,:) + + allocate (X(cholesky_ao_num)) + + + ! X(j) = \sum_{mn} SCF_density_matrix_ao(m,n) * cholesky_ao(m,n,j) + call dgemm('T','N',cholesky_ao_num,1,ao_num*ao_num,1.d0, & + cholesky_ao, ao_num*ao_num, & + SCF_density_matrix_ao, ao_num*ao_num,0.d0, & + X, cholesky_ao_num) +! + + ! ao_two_e_integral_alpha(m,n) = \sum_{j} cholesky_ao(m,n,j) * X(j) + call dgemm('N','N',ao_num*ao_num,1,cholesky_ao_num, 1.d0, & + cholesky_ao, ao_num*ao_num, & + X, cholesky_ao_num, 0.d0, & + ao_two_e_integral_alpha_chol, ao_num*ao_num) + + deallocate(X) + + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + + + allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + +! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,1), ao_num) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + + do s=1,ao_num + do j=1,cholesky_ao_num + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + deallocate(X2) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + + deallocate(X3) + END_PROVIDER BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ] diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index a7ac9fe4..a361c04f 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -80,9 +80,14 @@ subroutine run mo_label = 'Orthonormalized' - call Roothaan_Hall_SCF - call ezfio_set_hartree_fock_energy(SCF_energy) + write(json_unit,json_array_open_fmt) 'scf' + call Roothaan_Hall_SCF + + write(json_unit,json_array_close_fmtx) + call json_close + + call ezfio_set_hartree_fock_energy(SCF_energy) end diff --git a/src/iterations/EZFIO.cfg b/src/iterations/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations/io.irp.f b/src/iterations/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations/iterations.irp.f b/src/iterations/iterations.irp.f index 2c9faaf8..d06d1b6e 100644 --- a/src/iterations/iterations.irp.f +++ b/src/iterations/iterations.irp.f @@ -1,42 +1,65 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC - integer :: i - do i=1,min(N_states,N_det) - call extrapolate_data(N_iter, & - energy_iterations(i,1:N_iter), & - pt2_iterations(i,1:N_iter), & - extrapolated_energy(1:N_iter,i)) - enddo -END_PROVIDER - - -subroutine save_iterations(e_, pt2_,n_) +BEGIN_PROVIDER [ integer, N_iter ] implicit none BEGIN_DOC -! Update the energy in the EZFIO file. +! Number of CIPSI iterations END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) + N_iter = 0 +END_PROVIDER + +BEGIN_PROVIDER [ integer, N_iter_max ] + implicit none + BEGIN_DOC + ! Max number of iterations for extrapolations + END_DOC + N_iter_max = 8 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, energy_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, pt2_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter_max,N_states) ] + implicit none + BEGIN_DOC +! The energy at each iteration for the extrapolations + END_DOC + + energy_iterations = 0.d0 + pt2_iterations = 0.d0 + extrapolated_energy = 0.d0 +END_PROVIDER + +subroutine increment_n_iter(e, pt2_data) + use selection_types + implicit none + BEGIN_DOC +! Does what is necessary to increment n_iter + END_DOC + double precision, intent(in) :: e(*) + type(pt2_type), intent(in) :: pt2_data + integer :: k, i + + if (N_det < N_states) return + + if (N_iter < N_iter_max) then + N_iter += 1 + else + do k=2,N_iter + energy_iterations(1:N_states,k-1) = energy_iterations(1:N_states,k) + pt2_iterations(1:N_states,k-1) = pt2_iterations(1:N_states,k) enddo - N_iter = N_iter-1 - TOUCH N_iter endif + energy_iterations(1:N_states,N_iter) = e(1:N_states) + pt2_iterations(1:N_states,N_iter) = pt2_data % rpt2(1:N_states) - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) + if (N_iter < 2) then + extrapolated_energy(1,:) = energy_iterations(:,1) + pt2_iterations(:,1) + extrapolated_energy(2,:) = energy_iterations(:,2) + pt2_iterations(:,2) + else + do i=1,N_states + call extrapolate_data(N_iter, & + energy_iterations(i,1:N_iter), & + pt2_iterations(i,1:N_iter), & + extrapolated_energy(1:N_iter,i)) + enddo + endif end - diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index cb46fb67..a7f85693 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -5,10 +5,14 @@ subroutine print_extrapolated_energy END_DOC integer :: i,k + integer :: N_states_p, N_iter_p if (N_iter< 2) then return endif + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter, 8) + write(*,'(A)') '' write(*,'(A)') 'Extrapolated energies' write(*,'(A)') '------------------------' @@ -20,20 +24,20 @@ subroutine print_extrapolated_energy write(*,*) '=========== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) + do k=2,N_iter_p + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter_p+1-k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' - do i=2, min(N_states,N_det) + do i=2, N_states_p print *, '' print *, 'State ', i print *, '' write(*,*) '=========== ', '=================== ', '=================== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & + do k=2,N_iter_p + write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,k), extrapolated_energy(k,i), & extrapolated_energy(k,i) - extrapolated_energy(k,1), & (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 enddo diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations_tc/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED deleted file mode 100644 index e69de29b..00000000 diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations_tc/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f deleted file mode 100644 index 2f1cf0c1..00000000 --- a/src/iterations_tc/iterations.irp.f +++ /dev/null @@ -1,43 +0,0 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC -! integer :: i - extrapolated_energy = 0.D0 -END_PROVIDER - - subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy) - implicit none - integer, intent(in) :: Niter - double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter) - call extrapolate_data(Niter,ept2,pt1,extrap_energy) - end - -subroutine save_iterations(e_, pt2_,n_) - implicit none - BEGIN_DOC -! Update the energy in the EZFIO file. - END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) - enddo - N_iter = N_iter-1 - TOUCH N_iter - endif - - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) -end - diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f deleted file mode 100644 index cb46fb67..00000000 --- a/src/iterations_tc/print_extrapolation.irp.f +++ /dev/null @@ -1,46 +0,0 @@ -subroutine print_extrapolated_energy - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer :: i,k - - if (N_iter< 2) then - return - endif - write(*,'(A)') '' - write(*,'(A)') 'Extrapolated energies' - write(*,'(A)') '------------------------' - write(*,'(A)') '' - - print *, '' - print *, 'State ', 1 - print *, '' - write(*,*) '=========== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy' - write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) - enddo - write(*,*) '=========== ', '===================' - - do i=2, min(N_states,N_det) - print *, '' - print *, 'State ', i - print *, '' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & - extrapolated_energy(k,i) - extrapolated_energy(k,1), & - (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 - enddo - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - enddo - - print *, '' - -end subroutine - diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f deleted file mode 100644 index 8e6285e2..00000000 --- a/src/iterations_tc/print_summary.irp.f +++ /dev/null @@ -1,104 +0,0 @@ -subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_) - use selection_types - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer, intent(in) :: n_det_, n_configuration_, n_st - double precision, intent(in) :: e_(n_st), s2_(n_st) - type(pt2_type) , intent(in) :: pt2_data, pt2_data_err - integer :: i, k - integer :: N_states_p - character*(9) :: pt2_string - character*(512) :: fmt - - if (do_pt2) then - pt2_string = ' ' - else - pt2_string = '(approx)' - endif - - N_states_p = min(N_det_,n_st) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det_ - print '(A)', '-----------------------------------' - print *, '' - - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', e_(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1) - write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 - endif - write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - print *, 'N_det = ', N_det_ - print *, 'N_states = ', n_st - if (s2_eig) then - print *, 'N_cfg = ', N_configuration_ - if (only_expected_s2) then - print *, 'N_csf = ', N_csf - endif - endif - print *, '' - - do k=1, N_states_p - print*,'* State ',k - print *, '< S^2 > = ', s2_(k) - print *, 'E = ', e_(k) - print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) - print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) - print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, '' - enddo - - print *, '-----' - if(n_st.gt.1)then - print *, 'Variational Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i) - e_(1)), & - (e_(i) - e_(1)) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & - (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + renormalized perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & - (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 - enddo - endif - -! call print_energy_components() - -end subroutine - diff --git a/src/json/EZFIO.cfg b/src/json/EZFIO.cfg new file mode 100644 index 00000000..7dc8d796 --- /dev/null +++ b/src/json/EZFIO.cfg @@ -0,0 +1,5 @@ +[empty] +type: logical +doc: Needed to create the json directory +interface: ezfio + diff --git a/src/json/NEED b/src/json/NEED new file mode 100644 index 00000000..5a3182ed --- /dev/null +++ b/src/json/NEED @@ -0,0 +1 @@ +ezfio_files diff --git a/src/json/README.rst b/src/json/README.rst new file mode 100644 index 00000000..3dd9ffbb --- /dev/null +++ b/src/json/README.rst @@ -0,0 +1,5 @@ +==== +json +==== + +JSON files to simplify getting output information from QP. diff --git a/src/json/json.irp.f b/src/json/json.irp.f new file mode 100644 index 00000000..1fc24eb6 --- /dev/null +++ b/src/json/json.irp.f @@ -0,0 +1,45 @@ +BEGIN_PROVIDER [ character*(128), json_filename ] + implicit none + BEGIN_DOC + ! Fortran unit of the JSON file + END_DOC + integer, external :: getUnitAndOpen + integer :: counter + character*(128) :: prefix + logical :: exists + + prefix = trim(ezfio_filename)//'/json/' + + call lock_io + exists = .True. + counter = 0 + do while (exists) + counter += 1 + write(json_filename, '(A,I5.5,A)') trim(prefix), counter, '.json' + INQUIRE(FILE=trim(json_filename), EXIST=exists) + enddo + call unlock_io + +END_PROVIDER + +BEGIN_PROVIDER [ integer, json_unit] + implicit none + BEGIN_DOC + ! Unit file for JSON output + END_DOC + integer, external :: getUnitAndOpen + call ezfio_set_json_empty(.False.) + call lock_io + json_unit = getUnitAndOpen(json_filename, 'w') + write(json_unit, '(A)') '{' + call unlock_io +END_PROVIDER + +subroutine json_close + call lock_io + write(json_unit, '(A)') '}' + close(json_unit) + call unlock_io + FREE json_unit +end + diff --git a/src/json/json_formats.irp.f b/src/json/json_formats.irp.f new file mode 100644 index 00000000..773114ba --- /dev/null +++ b/src/json/json_formats.irp.f @@ -0,0 +1,46 @@ + BEGIN_PROVIDER [ character*(64), json_int_fmt ] +&BEGIN_PROVIDER [ character*(64), json_int_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_real_fmt ] +&BEGIN_PROVIDER [ character*(64), json_real_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_str_fmt ] +&BEGIN_PROVIDER [ character*(64), json_str_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_true_fmt ] +&BEGIN_PROVIDER [ character*(64), json_true_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_false_fmt ] +&BEGIN_PROVIDER [ character*(64), json_false_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_array_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_dict_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmtx ] + implicit none + BEGIN_DOC + ! Formats for JSON output. + ! x: used to mark the last write (no comma) + END_DOC + json_int_fmt = '('' "'',A,''": '',I10,'','')' + json_int_fmtx = '('' "'',A,''": '',I10)' + json_real_fmt = '('' "'',A,''": '',E22.15,'','')' + json_real_fmtx = '('' "'',A,''": '',E22.15)' + json_str_fmt = '('' "'',A,''": "'',A,''",'')' + json_str_fmtx = '('' "'',A,''": "'',A,''"'')' + json_true_fmt = '('' "'',A,''": true,'')' + json_true_fmtx = '('' "'',A,''": true'')' + json_false_fmt = '('' "'',A,''": false,'')' + json_false_fmtx = '('' "'',A,''": false'')' + json_array_open_fmt = '('' "'',A,''": ['')' + json_array_uopen_fmt = '('' ['')' + json_array_close_fmt = '('' ],'')' + json_array_close_uopen_fmt = '('' ], ['')' + json_array_close_fmtx = '('' ]'')' + json_dict_open_fmt = '('' "'',A,''": {'')' + json_dict_uopen_fmt = '('' {'')' + json_dict_close_fmt = '('' },'')' + json_dict_close_uopen_fmt = '('' }, {'')' + json_dict_close_fmtx = '('' }'')' +END_PROVIDER diff --git a/src/kohn_sham/ks_scf.irp.f b/src/kohn_sham/ks_scf.irp.f index aa6efd52..85bfc333 100644 --- a/src/kohn_sham/ks_scf.irp.f +++ b/src/kohn_sham/ks_scf.irp.f @@ -90,7 +90,11 @@ subroutine run ! Choose SCF algorithm + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + + call json_close end diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f new file mode 100644 index 00000000..7105c989 --- /dev/null +++ b/src/kohn_sham/print_mos.irp.f @@ -0,0 +1,23 @@ +program print_mos + implicit none + integer :: i,nx + double precision :: r(3), xmax, dx, accu + double precision, allocatable :: mos_array(:) + double precision:: alpha,envelop + allocate(mos_array(mo_num)) + xmax = 5.d0 + nx = 1000 + dx=xmax/dble(nx) + r = 0.d0 + alpha = 0.5d0 + do i = 1, nx + call give_all_mos_at_r(r,mos_array) + accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 + accu = dsqrt(accu) + envelop = (1.d0 - dexp(-alpha * r(3)**2)) + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + r(3) += dx + enddo + +end + diff --git a/src/kohn_sham_rs/rs_ks_scf.irp.f b/src/kohn_sham_rs/rs_ks_scf.irp.f index 84b85136..f28fd861 100644 --- a/src/kohn_sham_rs/rs_ks_scf.irp.f +++ b/src/kohn_sham_rs/rs_ks_scf.irp.f @@ -93,7 +93,10 @@ subroutine run level_shift += 1.d0 touch level_shift + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + call json_close call ezfio_set_kohn_sham_rs_energy(SCF_energy) write(*, '(A22,X,F16.10)') 'one_e_energy = ',one_e_energy diff --git a/src/mo_basis/mo_class.irp.f b/src/mo_basis/mo_class.irp.f index 95fbb443..7705e414 100644 --- a/src/mo_basis/mo_class.irp.f +++ b/src/mo_basis/mo_class.irp.f @@ -1,8 +1,3 @@ -! DO NOT MODIFY BY HAND -! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/eginer/programs/qp2/src/mo_basis/EZFIO.cfg - - BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ] implicit none BEGIN_DOC @@ -35,6 +30,4 @@ BEGIN_PROVIDER [ character*(32), mo_class , (mo_num) ] endif IRP_ENDIF - call write_time(6) - END_PROVIDER diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 12c6c79d..5f664c41 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -3,7 +3,6 @@ subroutine save_mos double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(mo_num) call ezfio_set_mo_basis_mo_label(mo_label) call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -27,7 +26,7 @@ subroutine save_mos_no_occ double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) !call ezfio_set_mo_basis_mo_num(mo_num) !call ezfio_set_mo_basis_mo_label(mo_label) !call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -48,7 +47,7 @@ subroutine save_mos_truncated(n) double precision, allocatable :: buffer(:,:) integer :: i,j,n - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(n) call ezfio_set_mo_basis_mo_label(mo_label) diff --git a/src/mo_localization/84.mo_localization.bats b/src/mo_localization/84.mo_localization.bats new file mode 100644 index 00000000..b34c0bd5 --- /dev/null +++ b/src/mo_localization/84.mo_localization.bats @@ -0,0 +1,97 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + +zero () { + if [ -z "$1" ]; then echo 0.0; else echo $1; fi +} + +function run() { + thresh1=1e-10 + thresh2=1e-12 + thresh3=1e-4 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -d + qp set_frozen_core + qp set localization localization_method boys + file="$(echo $1 | sed 's/.ezfio//g')" + energy="$(cat $1/hartree_fock/energy)" + fb_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + fb_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + fb_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + fb_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + fb_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + fb_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + fb_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + qp reset -a + qp run scf + qp set_frozen_core + qp set localization localization_method pipek + pm_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + pm_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + pm_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + pm_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + pm_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + pm_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + pm_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + qp set localization localization_method boys + qp reset -a + qp run scf + qp set_frozen_core + eq $energy $fb_energy $thresh1 + eq $fb_err1 0.0 $thresh2 + eq $fb_err2 0.0 $thresh2 + eq $energy $pm_energy $thresh1 + eq $pm_err1 0.0 $thresh2 + eq $pm_err2 0.0 $thresh2 + fb_c=$(zero $fb_c) + fb_i=$(zero $fb_i) + fb_a=$(zero $fb_a) + fb_v=$(zero $fb_v) + pm_c=$(zero $pm_c) + pm_i=$(zero $pm_i) + pm_a=$(zero $pm_a) + pm_v=$(zero $pm_v) + eq $fb_c $2 $thresh3 + eq $fb_i $3 $thresh3 + eq $fb_a $4 $thresh3 + eq $fb_v $5 $thresh3 + eq $pm_c $6 $thresh3 + eq $pm_i $7 $thresh3 + eq $pm_a $8 $thresh3 + eq $pm_v $9 $thresh3 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -32.1357551678876 -47.0041982094667 0.0 -223.470015856259 -1.99990778964451 -2.51376723927071 0.0 -12.8490602539275 +} + +@test "clo" { +run clo.ezfio -44.1624001765291 -32.4386660941387 0.0 -103.666309287187 -5.99985418946811 -5.46871580225222 0.0 -20.2480064922275 +} + +@test "clf" { +run clf.ezfio -47.5143398826967 -35.7206886315104 0.0 -107.043029033468 -5.99994222062230 -6.63916513458470 0.0 -19.7035159913484 +} + +@test "h2o2" { +run h2o2.ezfio -7.76848143170524 -30.9694344369829 0.0 -175.898343829453 -1.99990497554575 -5.62980322957485 0.0 -33.5699813186666 +} + +@test "h2o" { +run h2o.ezfio 0.0 -2.52317434969591 0.0 -45.3136377925359 0.0 -3.01248365356981 0.0 -22.4470831240924 +} + +@test "h3coh" { +run h3coh.ezfio -3.66763692804590 -24.0463089480870 0.0 -111.485948435075 -1.99714061342078 -4.89242181322988 0.0 -23.6405412057679 +} + +@test "n2h4" { +run n2h4.ezfio -7.46608163002070 -35.7632174051822 0.0 -305.913449004632 -1.99989326143356 -4.62496615892268 0.0 -51.5171904685553 +} + diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..d1b844a5 --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys: Foster-Boys, pipek: Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[auto_mo_class] +type: logical +doc: If true, set automatically the classes. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..b438f39d --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,3 @@ +hartree_fock +utils_trust_region +determinants diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..c28a5ee1 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,113 @@ +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +By default, the different otbital classes are automatically set by splitting +the orbitales in the following classes: +- Core -> Core +- Active, doubly occupied -> Inactive +- Active, singly occupied -> Active +- Active, empty -> Virtual +- Deleted -> Deleted +The orbitals will be localized among each class, excpect the deleted ones. +If you want to choose another splitting, you can set +``` +qp set mo_localization auto_mo_class false +``` +and define the classes with +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +for more information +``` +qp set_mo_class -q +``` +We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. +If you are using the last option don't forget to reset the initial mo classes +after the localization. + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +This program work exactly as the localization. +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Parameters +Some other parameters are available for the localization (qp edit for more details). + +# Tests +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..2048aca6 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,27 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..d935e782 --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,65 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..3ee4f0fa --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,65 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..b6c77c9e --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,16 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..7ccb2f5a --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,520 @@ +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +! Loc + +! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..f5afed07 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2008 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end diff --git a/src/mo_localization/org/TANGLE_org_mode.sh b/src/mo_localization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/org/break_spatial_sym.org b/src/mo_localization/org/break_spatial_sym.org new file mode 100644 index 00000000..d82f1c60 --- /dev/null +++ b/src/mo_localization/org/break_spatial_sym.org @@ -0,0 +1,28 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_gradient_loc.org b/src/mo_localization/org/debug_gradient_loc.org new file mode 100644 index 00000000..6d147dd0 --- /dev/null +++ b/src/mo_localization/org/debug_gradient_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_hessian_loc.org b/src/mo_localization/org/debug_hessian_loc.org new file mode 100644 index 00000000..e47cf38d --- /dev/null +++ b/src/mo_localization/org/debug_hessian_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/org/kick_the_mos.org b/src/mo_localization/org/kick_the_mos.org new file mode 100644 index 00000000..c0c6c02d --- /dev/null +++ b/src/mo_localization/org/kick_the_mos.org @@ -0,0 +1,18 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/localization.org b/src/mo_localization/org/localization.org new file mode 100644 index 00000000..aaf9f18d --- /dev/null +++ b/src/mo_localization/org/localization.org @@ -0,0 +1,2899 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +Foster-Boys localization: +- cite Foster +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Noting +$$A_{ii} = < \phi_i | r^2 | \phi_i > $$ +$$B_{ii} = < \phi_i | r | \phi_i > $$ + +$$ \beta = (B_{pp} - B_{qq})^2 - 4 B_{pq}^2 $$ +$$ \gamma = 4 B_{pq} (B_{pp} - B_{qq}) $$ + +\begin{align*} +C_{FB}(\theta) &= \sum_{i=1}^N \left[ A_{ii} - B_{ii}^2 \right] \\ +&- \left[ A_{pp} - B_{pp}^2 + A_{qq} - B_{qq}^2 \right] \\ +&+ \left[ A_{pp} + A_{qq} - B_{pp}^2 - B_{qq}^2 ++ \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \right] \\ +&= C_1(\theta=0) + \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_{FB}(\theta)}{\partial \theta} = \beta \sin(4\theta) + \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2} = 4 \beta \cos(4\theta) - 4 \gamma \sin(4 \theta) +\end{align*} + +Similarly: +\begin{align*} +C_3(\theta) &= \sum_{i=1}^N [B_{ii}^2] \\ +&- B_{pp}^2 - B_{qq}^2 \\ +&+ B_{pp}^2 + B_{qq}^2 - \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \\ +&= C_3(\theta=0) - \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_3(\theta)}{\partial \theta} = - \beta \sin(4\theta) - \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_3(\theta)}{\partial \theta^2} = - 4 \beta \cos(4\theta) + 4 \gamma \sin(4 \theta) +\end{align*} + +And since we compute the derivatives around $\theta = 0$ (around the +actual position) we have: +\begin{align*} +\left. \frac{\partial{C_{FB}(\theta)}}{\partial \theta}\right|_{\theta=0} = \gamma +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta +\end{align*} + +Locality of the orbitals: +- cite Hoyvik +As the Foster-Boys method tries to minimize the sum of the second +moment MO spread, the locality of each MO can be expressed as the +second moment of the MO spread. For the MO i, the locality criterion is +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +-cite pipek mezey 1989 +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +The optimum functional $\mathcal{P}$ is obtained for the maximum of +$D^{-1}$ +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} + +As for the Foster Boys localization, the change in the functional for +the rotation of two MOs can be obtained using very similar terms +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} +The matrix element of the operator $P_A$ are obtained using +\begin{align*} +<\rho | \tilde{\mu}> = \delta_{\rho \mu} +\end{align*} +which leads to +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +So similarly the first and second derivatives are + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end +#+END_SRC diff --git a/src/mo_optimization/83.mo_optimization.bats b/src/mo_optimization/83.mo_optimization.bats new file mode 100644 index 00000000..5bc3d313 --- /dev/null +++ b/src/mo_optimization/83.mo_optimization.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh=2e-3 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -a + qp run scf + qp set_frozen_core + qp set determinants n_states 2 + qp set determinants read_wf true + qp set mo_two_e_ints io_mo_two_e_integrals None + file="$(echo $1 | sed 's/.ezfio//g')" + qp run cis + qp run debug_gradient_list_opt > $file.debug_g.out + err3="$(grep 'Max error:' $file.debug_g.out | awk '{print $3}')" + qp run debug_hessian_list_opt > $file.debug_h1.out + err1="$(grep 'Max error:' $file.debug_h1.out | awk '{print $3}')" + qp run orb_opt > $file.opt1.out + energy1="$(grep 'State average energy:' $file.opt1.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method diag + qp reset -d + qp run scf + qp run cis + qp run debug_hessian_list_opt > $file.debug_h2.out + err2="$(grep 'Max error_H:' $file.debug_h2.out | awk '{print $3}')" + qp run orb_opt > $file.opt2.out + energy2="$(grep 'State average energy:' $file.opt2.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method full + qp reset -d + qp run scf + eq $energy1 $2 $thresh + eq $energy2 $3 $thresh + eq $err1 0.0 1e-12 + eq $err2 0.0 1e-12 + eq $err3 0.0 1e-12 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -48.9852901484277 -48.9852937541510 +} + +@test "h2o" { +run h2o.ezfio -75.9025622449206 -75.8691844585879 +} + +@test "h2s" { +run h2s.ezfio -398.576255809878 -398.574145943928 +} + +@test "hbo" { +run hbo.ezfio -99.9234823022109 -99.9234763597840 +} + +@test "hco" { +run hco.ezfio -113.204915552241 -113.204905207050 +} diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg new file mode 100644 index 00000000..e6aa2d67 --- /dev/null +++ b/src/mo_optimization/EZFIO.cfg @@ -0,0 +1,23 @@ +[optimization_method] +type: character*(32) +doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian +interface: ezfio,provider,ocaml +default: full + +[n_det_max_opt] +type: integer +doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) +interface: ezfio,provider,ocaml +default: 200000 + +[optimization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital optimization +interface: ezfio,provider,ocaml +default: 20 + +[thresh_opt_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the optimization exits when the biggest element in the gradient is smaller than thresh_optimization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-5 diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED new file mode 100644 index 00000000..91f41ee3 --- /dev/null +++ b/src/mo_optimization/NEED @@ -0,0 +1,7 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +selectors_full +generators_full +utils_trust_region diff --git a/src/mo_optimization/README.md b/src/mo_optimization/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization/class.irp.f new file mode 100644 index 00000000..b4a68ac2 --- /dev/null +++ b/src/mo_optimization/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the FCI case, all those are always false + END_DOC + do_only_1h1p = .False. + do_only_cas = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization/constants.h new file mode 100644 index 00000000..1cd00bda --- /dev/null +++ b/src/mo_optimization/constants.h @@ -0,0 +1 @@ + logical, parameter :: debug=.False. diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f new file mode 100644 index 00000000..867e0105 --- /dev/null +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -0,0 +1,78 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f new file mode 100644 index 00000000..8aeec18f --- /dev/null +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -0,0 +1,76 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f new file mode 100644 index 00000000..d1aa79c4 --- /dev/null +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -0,0 +1,147 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | +! | optimization_method | string | Method for the orbital optimization: | +! | | | - 'full' -> full hessian | +! | | | - 'diag' -> diagonal hessian | +! | dim_list_act_orb | integer | number of active MOs | +! | list_act(dim_list_act_orb) | integer | list of the actives MOs | +! | | | | + +! Internal: +! | m | integer | number of MOs in the list | +! | | | (active MOs) | +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f new file mode 100644 index 00000000..6d22cc01 --- /dev/null +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -0,0 +1,171 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization/diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..fe54fa7a --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_list_opt.irp.f @@ -0,0 +1,1556 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) + +! Initialization of the arrays + +!!$OMP DO +!do tmp_s = 1,m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3, tmp_2rdm_3) + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do tmp_p = 1, m +! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_s = 1, m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m + +! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & +! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do tmp_pq = 1, n +! write(*,'(100(F10.5))') H(tmp_pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization/diagonal_hessian_opt.irp.f new file mode 100644 index 00000000..7688ec37 --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_opt.irp.f @@ -0,0 +1,1511 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) + +! Initialization of the arrays + +!!$OMP DO +!do s = 1,mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,r,s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_2rdm_3,tmp_bi_int_3) +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do p = 1, mo_num +! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do s = 1, mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num + +! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do pq = 1, n +! write(*,'(100(F10.5))') H(pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization/diagonalization_hessian.irp.f new file mode 100644 index 00000000..e25879d9 --- /dev/null +++ b/src/mo_optimization/diagonalization_hessian.irp.f @@ -0,0 +1,136 @@ +! Diagonalization of the hessian + +! Just a matrix diagonalization using Lapack + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | H(n,n) | double precision | hessian | + +! Output: +! | e_val(n) | double precision | eigenvalues of the hessian | +! | w(n,n) | double precision | eigenvectors of the hessian | + +! Internal: +! | nb_negative_nv | integer | number of negative eigenvalues | +! | lwork | integer | for Lapack | +! | work(lwork,n) | double precision | temporary array for Lapack | +! | info | integer | if 0 -> ok, else problem in the diagonalization | +! | i,j | integer | dummy indexes | + + +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..58536993 --- /dev/null +++ b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f @@ -0,0 +1,372 @@ +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization/gradient_opt.irp.f new file mode 100644 index 00000000..25be6b5a --- /dev/null +++ b/src/mo_optimization/gradient_opt.irp.f @@ -0,0 +1,346 @@ +! Gradient + +! The gradient of the CI energy with respects to the orbital rotation +! is: +! (C-c C-x C-l) +! $$ +! G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] +! $$ + + +! $$ +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! $$ + +! $$ +! G(p,q) = \left[ +! \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] - +! \left[ +! \sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +! \sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +! \Gamma_{qt}^{rs}) +! \right] +! $$ + +! Where p,q,r,s,t are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +! E. Scuseria + +! *Compute the gradient of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +! | two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | v_grad(n) | double precision | the gradient | +! | max_elem | double precision | maximum element of the gradient | + +! Internal: +! | grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +! | A((mo_num,mo_num) | doubre precision | gradient after the permutations | +! | norm | double precision | norm of the gradient | +! | p, q | integer | indexes of the element in the matrix grad | +! | i | integer | index for the tranformation in a vector | +! | r, s, t | integer | indexes dor the sums | +! | t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +! | t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | +! | dnrm2 | double precision | (Lapack) norm | + + +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) + +! Initialization + +!$OMP DO +do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +! Term 1 + +! Without optimization the term 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! grad(p,q) = grad(p,q) & +! + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) +! enddo +! enddo +! enddo + +! Since the matrix multiplication A.B is defined like : +! \begin{equation} +! c_{ij} = \sum_k a_{ik}.b_{kj} +! \end{equation} +! The previous equation can be rewritten as a matrix multplication + + +!**************** +! Opt first term +!**************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& +mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient, first term (s) :', t6 +!$OMP END MASTER + +! Term 2 + +! Without optimization the second term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num +! do t= 1, mo_num + +! grad(p,q) = grad(p,q) & +! + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & +! - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) +! enddo +! enddo +! enddo +! enddo +! enddo + +! Using the bielectronic integral properties : +! get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +! Using the two body matrix properties : +! two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +! t is one the right, we can put it on the external loop and create 3 +! indexes temporary array +! r,s can be seen as one index + +! By doing so, a matrix multiplication appears + + +!***************** +! Opt second term +!***************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient second term (s) : ', t6 +!$OMP END MASTER + +! Deallocation of private arrays + +deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + +!$OMP END PARALLEL + +call omp_set_max_active_levels(4) + +! Permutation, 2D matrix -> vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization/hessian_list_opt.irp.f new file mode 100644 index 00000000..31af769a --- /dev/null +++ b/src/mo_optimization/hessian_list_opt.irp.f @@ -0,0 +1,1129 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo +enddo +!$OMP END DO + + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + + + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization/hessian_opt.irp.f new file mode 100644 index 00000000..0b4312c6 --- /dev/null +++ b/src/mo_optimization/hessian_opt.irp.f @@ -0,0 +1,1043 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f new file mode 100644 index 00000000..7469ffd5 --- /dev/null +++ b/src/mo_optimization/my_providers.irp.f @@ -0,0 +1,141 @@ +! Dimensions of MOs + + +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER + +! Energies/criterions + +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER + +! With all the MOs + +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER + +! With the list of active MOs +! Can be generalized to any mo_class by changing the list/dimension + +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/optimization.irp.f new file mode 100644 index 00000000..9892b3e3 --- /dev/null +++ b/src/mo_optimization/optimization.irp.f @@ -0,0 +1,86 @@ +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end + +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end diff --git a/src/mo_optimization/orb_opt.irp.f b/src/mo_optimization/orb_opt.irp.f new file mode 100644 index 00000000..71ff9262 --- /dev/null +++ b/src/mo_optimization/orb_opt.irp.f @@ -0,0 +1,22 @@ +! Orbital optimization program + +! This is an optimization program for molecular orbitals. It produces +! orbital rotations in order to lower the energy of a truncated wave +! function. +! This program just optimize the orbitals for a fixed number of +! determinants. This optimization process must be repeated for different +! number of determinants. + + + + +! Main program : orb_opt_trust + + +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end diff --git a/src/mo_optimization/org/TANGLE_org_mode.sh b/src/mo_optimization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_optimization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization/org/TODO.org new file mode 100644 index 00000000..960b9ba6 --- /dev/null +++ b/src/mo_optimization/org/TODO.org @@ -0,0 +1,17 @@ +TODO: +** TODO Keep under surveillance the performance of rotation matrix +- is the fix ok ? +** DONE Provider state_average_weight +** DONE Diagonal hessian for orbital optimization with a list of MOs +** DONE Something to force the step cancellation if R.R^T > treshold +** TODO Iterative method to compute the rotation matrix +- doesn't work actually +** DONE Test trust region with polynomial functions +** DONE Optimization/Localization program using the template +** DONE Correction OMP hessian shared/private arrays +** DONE State average energy +** DONE Correction of Rho +** TODO Check the PROVIDE/FREE/TOUCH +** TODO research of lambda without the power 2 +** DONE Clean the OMP sections + diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization/org/debug_gradient_list_opt.org new file mode 100644 index 00000000..3c6f98c0 --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_list_opt.org @@ -0,0 +1,79 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization/org/debug_gradient_opt.org new file mode 100644 index 00000000..101e1e8c --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_opt.org @@ -0,0 +1,77 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization/org/debug_hessian_list_opt.org new file mode 100644 index 00000000..76e8b337 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_list_opt.org @@ -0,0 +1,148 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | +| optimization_method | string | Method for the orbital optimization: | +| | | - 'full' -> full hessian | +| | | - 'diag' -> diagonal hessian | +| dim_list_act_orb | integer | number of active MOs | +| list_act(dim_list_act_orb) | integer | list of the actives MOs | +| | | | + +Internal: +| m | integer | number of MOs in the list | +| | | (active MOs) | +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization/org/debug_hessian_opt.org new file mode 100644 index 00000000..40f84c82 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_opt.org @@ -0,0 +1,172 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization/org/diagonal_hessian_list_opt.org new file mode 100644 index 00000000..a12ca981 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_list_opt.org @@ -0,0 +1,1561 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_s = 1,m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3, tmp_2rdm_3) +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_p = 1, m + ! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_s = 1, m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + + ! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + ! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do tmp_pq = 1, n + ! write(*,'(100(F10.5))') H(tmp_pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization/org/diagonal_hessian_opt.org new file mode 100644 index 00000000..efd75065 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_opt.org @@ -0,0 +1,1516 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do s = 1,mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,r,s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + deallocate(tmp_2rdm_3,tmp_bi_int_3) + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do p = 1, mo_num + ! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do s = 1, mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + + ! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do pq = 1, n + ! write(*,'(100(F10.5))') H(pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization/org/diagonalization_hessian.org new file mode 100644 index 00000000..5eed7dd5 --- /dev/null +++ b/src/mo_optimization/org/diagonalization_hessian.org @@ -0,0 +1,138 @@ +* Diagonalization of the hessian + +Just a matrix diagonalization using Lapack + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| H(n,n) | double precision | hessian | + +Output: +| e_val(n) | double precision | eigenvalues of the hessian | +| w(n,n) | double precision | eigenvectors of the hessian | + +Internal: +| nb_negative_nv | integer | number of negative eigenvalues | +| lwork | integer | for Lapack | +| work(lwork,n) | double precision | temporary array for Lapack | +| info | integer | if 0 -> ok, else problem in the diagonalization | +| i,j | integer | dummy indexes | + +#+BEGIN_SRC f90 :comments org :tangle diagonalization_hessian.irp.f +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org new file mode 100644 index 00000000..391c6033 --- /dev/null +++ b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org @@ -0,0 +1,376 @@ +* First diagonal hessian + +#+BEGIN_SRC f90 :comments :tangle first_diagonal_hessian_list_opt.irp.f +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization/org/gradient_opt.org new file mode 100644 index 00000000..45c761e9 --- /dev/null +++ b/src/mo_optimization/org/gradient_opt.org @@ -0,0 +1,358 @@ +* Gradient + +The gradient of the CI energy with respects to the orbital rotation +is: +(C-c C-x C-l) +$$ +G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] +$$ + + +$$ +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +$$ + +$$ +G(p,q) = \left[ +\sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] - +\left[ +\sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +\sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +\Gamma_{qt}^{rs}) +\right] +$$ + +Where p,q,r,s,t are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +E. Scuseria + +*Compute the gradient of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +| two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| v_grad(n) | double precision | the gradient | +| max_elem | double precision | maximum element of the gradient | + +Internal: +| grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +| A((mo_num,mo_num) | doubre precision | gradient after the permutations | +| norm | double precision | norm of the gradient | +| p, q | integer | indexes of the element in the matrix grad | +| i | integer | index for the tranformation in a vector | +| r, s, t | integer | indexes dor the sums | +| t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +| t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | +| dnrm2 | double precision | (Lapack) norm | + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) +#+END_SRC + +** Calculation +*** Initialization +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo + enddo + !$OMP END DO +#+END_SRC + +*** Term 1 + +Without optimization the term 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + grad(p,q) = grad(p,q) & + + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) + enddo + enddo +enddo + +Since the matrix multiplication A.B is defined like : +\begin{equation} +c_{ij} = \sum_k a_{ik}.b_{kj} +\end{equation} +The previous equation can be rewritten as a matrix multplication + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !**************** + ! Opt first term + !**************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& + mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient, first term (s) :', t6 + !$OMP END MASTER +#+END_SRC + +*** Term 2 + +Without optimization the second term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + do t= 1, mo_num + + grad(p,q) = grad(p,q) & + + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & + - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) + enddo + enddo + enddo + enddo +enddo + +Using the bielectronic integral properties : +get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +Using the two body matrix properties : +two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +t is one the right, we can put it on the external loop and create 3 +indexes temporary array +r,s can be seen as one index + +By doing so, a matrix multiplication appears + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !***************** + ! Opt second term + !***************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient second term (s) : ', t6 + !$OMP END MASTER +#+END_SRC + +*** Deallocation of private arrays +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) +#+END_SRC + +*** Permutation, 2D matrix -> vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(grad,A) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization/org/hessian_list_opt.org new file mode 100644 index 00000000..3df461cf --- /dev/null +++ b/src/mo_optimization/org/hessian_list_opt.org @@ -0,0 +1,1141 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo + enddo + !$OMP END DO + + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization/org/hessian_opt.org new file mode 100644 index 00000000..5b0642e3 --- /dev/null +++ b/src/mo_optimization/org/hessian_opt.org @@ -0,0 +1,1056 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization/org/my_providers.org new file mode 100644 index 00000000..b88cbd62 --- /dev/null +++ b/src/mo_optimization/org/my_providers.org @@ -0,0 +1,308 @@ +* Providers +** Dimensions of MOs + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER +#+END_SRC + +** Energies/criterions +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER +#+END_SRC + +** Gradient/hessian +*** Orbital optimization +**** With all the MOs +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER +#+END_SRC + +**** With the list of active MOs +Can be generalized to any mo_class by changing the list/dimension +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER +#+END_SRC + +*** Orbital localization +**** Gradient +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_core, (n_mo_dim_core) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_core ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the core MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_gradient_loc_core, my_CC_loc_core , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_act, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_act ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the active MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_loc_act, my_CC_loc_act , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org ! +:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_inact, (n_mo_dim_inact) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_inact ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the inactive MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_gradient_loc_inact, my_CC_loc_inact , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_virt, (n_mo_dim_virt) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_virt ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the virtual MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_gradient_loc_virt, my_CC_loc_virt , norm_grad) + +END_PROVIDER +#+END_SRC + +**** Hessian +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_core, (n_mo_dim_core) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the core MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_hessian_loc_core) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_act, (n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the active MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_loc_act) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_inact, (n_mo_dim_inact) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the inactive MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_hessian_loc_inact) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_virt, (n_mo_dim_virt) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the virtual MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_hessian_loc_virt) + +END_PROVIDER +#+END_SRC + diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization/org/optimization.org new file mode 100644 index 00000000..cbb7b752 --- /dev/null +++ b/src/mo_optimization/org/optimization.org @@ -0,0 +1,91 @@ +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end + +#+END_SRC diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization/org/orb_opt_trust_v2.org new file mode 100644 index 00000000..dbcd3c19 --- /dev/null +++ b/src/mo_optimization/org/orb_opt_trust_v2.org @@ -0,0 +1,349 @@ +* Orbital optimization program + +This is an optimization program for molecular orbitals. It produces +orbital rotations in order to lower the energy of a truncated wave +function. +This program just optimize the orbitals for a fixed number of +determinants. This optimization process must be repeated for different +number of determinants. + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +#+END_SRC + +* Main program : orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end +#+END_SRC + +* Subroutine : run_orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + +#+END_SRC + +Subroutine to optimize the MOs using a trust region algorithm: +- choice of the method +- initialization +- optimization until convergence + +The optimization use the trust region algorithm, the different parts +are explained in the corresponding subroutine files. + +qp_edit: +| thresh_opt_max_elem_grad | +| optimization_max_nb_iter | +| optimization_method | + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| N_states | integer | number of states | +| ci_energy(N_states) | double precision | CI energies | +| state_average_weight(N_states) | double precision | Weight of the different states | + +Variables: +| m | integer | number of active MOs | +| tmp_n | integer | m*(m-1)/2, number of MO parameters | +| tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(tmp_n) | double precision | gradient | +| H(tmp_n,tmp_n) | double precision | hessian (2D) | +| h_f(m,m,m,m) | double precision | hessian (4D) | +| e_val(m) | double precision | eigenvalues of the hessian | +| w(m,m) | double precision | eigenvectors of the hessian | +| x(m) | double precision | step given by the trust region | +| m_x(m,m) | double precision | step given by the trust region after | +| tmp_R(m,m) | double precision | rotation matrix for active MOs | +| R(mo_num,mo_num) | double precision | full rotation matrix | +| prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +| new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +| delta | double precision | radius of the trust region | +| rho | double precision | agreement between the model and the exact function | +| max_elem | double precision | maximum element in the gradient | +| i | integer | index | +| tmp_i,tmp_j | integer | indexes in the subspace containing only | +| | | the active MOs | +| converged | logical | convergence of the algorithm | +| cancel_step | logical | if the step must be cancelled | +| nb_iter | integer | number of iterations (accepted) | +| nb_diag | integer | number of diagonalizations of the CI matrix | +| nb_cancel | integer | number of cancelled steps for the actual iteration | +| nb_cancel_tot | integer | total number of cancel steps | +| info | integer | if 0 ok, else problem in the diagonalization of | +| | | the hessian with the Lapack routine | +| criterion | double precision | energy at a given step | +| prev_criterion | double precision | energy before the rotation | +| criterion_model | double precision | estimated energy after the rotation using | +| | | a Taylor series | +| must_exit | logical | To exit the trust region algorithm when | +| | | criterion - criterion_model is too small | +| enforce_step_cancellation | logical | To force the cancellation of the step if the | +| | | error in the rotation matrix is too large | + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef +#+END_SRC + +** Allocation + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + allocate(R(mo_num,mo_num)) ! rotation matrix + allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + + ! Definition of m and tmp_n + m = dim_list_act_orb + tmp_n = m*(m-1)/2 + + allocate(tmp_list(m)) + allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) + allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +#+END_SRC + +** Method + There are three different methods : + - the "full" hessian, which uses all the elements of the hessian + matrix" + - the "diagonal" hessian, which uses only the diagonal elements of the + hessian + - without the hessian (hessian = identity matrix) + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + !Display the method + print*, 'Method :', optimization_method + if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n + elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + else + print*,'Unknown optimization_method, please select full, diag or none' + call abort + endif + print*, 'Absolute value of the hessian:', absolute_eig +#+END_SRC + +** Calculations +*** Algorithm + +Here is the main algorithm of the optimization: +- First of all we initialize some parameters and we compute the + criterion (the ci energy) before doing any MO rotations +- We compute the gradient and the hessian for the active MOs +- We diagonalize the hessian +- We compute a step and loop to reduce the radius of the + trust region (and the size of the step by the way) until the step is + accepted +- We repeat the process until the convergence + NB: the convergence criterion can be changed + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + ! Loop until the convergence of the optimization + ! call diagonalize_ci + + !### Initialization ### + nb_iter = 0 + rho = 0.5d0 + not_converged = .True. + tmp_list = list_act ! Optimization of the active MOs + nb_cancel_tot = 0 + + ! Renormalization of the weights of the states + call state_weight_normalization + + ! Compute the criterion before the loop + call state_average_energy(prev_criterion) + + do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif + enddo + +#+END_SRC + +** Deallocation, end + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end +#+END_SRC + diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization/org/state_average_energy.org new file mode 100644 index 00000000..b779845a --- /dev/null +++ b/src/mo_optimization/org/state_average_energy.org @@ -0,0 +1,73 @@ +* State average energy + +Calculation of the state average energy from the integrals and the +density matrices. + +\begin{align*} +E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +\end{align*} +$h_{ij}$: mono-electronic integral +$\gamma_{ij}$: one electron density matrix +$v_{ij}^{kl}$: bi-electronic integral +$\Gamma_{ij}^{kl}$: two electrons density matrix + +TODO: OMP version + +PROVIDED: +| mo_one_e_integrals | double precision | mono-electronic integrals | +| get_two_e_integral | double precision | bi-electronic integrals | +| one_e_dm_mo | double precision | one electron density matrix | +| two_e_dm_mo | double precision | two electrons density matrix | +| nuclear_repulsion | double precision | nuclear repulsion | +| mo_num | integer | number of MOs | + +Output: +| energy | double precision | state average energy | + +Internal: +| mono_e | double precision | mono-electronic energy | +| bi_e | double precision | bi-electronic energy | +| i,j,k,l | integer | indexes to loop over the MOs | + +#+BEGIN_SRC f90 :comments org :tangle state_average_energy.irp.f +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end +#+END_SRC diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization/org/state_weight_normalization.org new file mode 100644 index 00000000..492ad3d4 --- /dev/null +++ b/src/mo_optimization/org/state_weight_normalization.org @@ -0,0 +1,31 @@ +#+BEGIN_SRC f90 :comments org :tangle state_weight_normalization.irp.f +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end +#+END_SRC diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization/org/update_parameters.org new file mode 100644 index 00000000..cd9b9595 --- /dev/null +++ b/src/mo_optimization/org/update_parameters.org @@ -0,0 +1,16 @@ +Subroutine toupdate the parameters. +Ex: TOUCH mo_coef ... + +#+BEGIN_SRC f90 :comments org :tangle update_parameters.irp.f +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end +#+END_SRC diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization/org/update_st_av_ci_energy.org new file mode 100644 index 00000000..2dc7f3ee --- /dev/null +++ b/src/mo_optimization/org/update_st_av_ci_energy.org @@ -0,0 +1,26 @@ +* Update the CI state average energy + +Computes the state average energy +\begin{align*} +E =\sum_{i=1}^{N_{states}} E_i . w_i +\end{align*} + +$E_i$: energy of state i +$w_i$: weight of state i + +#+BEGIN_SRC f90 :comments org :tangle update_st_av_ci_energy.irp.f +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end +#+END_SRC diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization/run_orb_opt_trust_v2.irp.f new file mode 100644 index 00000000..e1431255 --- /dev/null +++ b/src/mo_optimization/run_orb_opt_trust_v2.irp.f @@ -0,0 +1,317 @@ +! Subroutine : run_orb_opt_trust + + + + + + +! Subroutine to optimize the MOs using a trust region algorithm: +! - choice of the method +! - initialization +! - optimization until convergence + +! The optimization use the trust region algorithm, the different parts +! are explained in the corresponding subroutine files. + +! qp_edit: +! | thresh_opt_max_elem_grad | +! | optimization_max_nb_iter | +! | optimization_method | + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | N_states | integer | number of states | +! | ci_energy(N_states) | double precision | CI energies | +! | state_average_weight(N_states) | double precision | Weight of the different states | + +! Variables: +! | m | integer | number of active MOs | +! | tmp_n | integer | m*(m-1)/2, number of MO parameters | +! | tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(tmp_n) | double precision | gradient | +! | H(tmp_n,tmp_n) | double precision | hessian (2D) | +! | h_f(m,m,m,m) | double precision | hessian (4D) | +! | e_val(m) | double precision | eigenvalues of the hessian | +! | w(m,m) | double precision | eigenvectors of the hessian | +! | x(m) | double precision | step given by the trust region | +! | m_x(m,m) | double precision | step given by the trust region after | +! | tmp_R(m,m) | double precision | rotation matrix for active MOs | +! | R(mo_num,mo_num) | double precision | full rotation matrix | +! | prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +! | new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +! | delta | double precision | radius of the trust region | +! | rho | double precision | agreement between the model and the exact function | +! | max_elem | double precision | maximum element in the gradient | +! | i | integer | index | +! | tmp_i,tmp_j | integer | indexes in the subspace containing only | +! | | | the active MOs | +! | converged | logical | convergence of the algorithm | +! | cancel_step | logical | if the step must be cancelled | +! | nb_iter | integer | number of iterations (accepted) | +! | nb_diag | integer | number of diagonalizations of the CI matrix | +! | nb_cancel | integer | number of cancelled steps for the actual iteration | +! | nb_cancel_tot | integer | total number of cancel steps | +! | info | integer | if 0 ok, else problem in the diagonalization of | +! | | | the hessian with the Lapack routine | +! | criterion | double precision | energy at a given step | +! | prev_criterion | double precision | energy before the rotation | +! | criterion_model | double precision | estimated energy after the rotation using | +! | | | a Taylor series | +! | must_exit | logical | To exit the trust region algorithm when | +! | | | criterion - criterion_model is too small | +! | enforce_step_cancellation | logical | To force the cancellation of the step if the | +! | | | error in the rotation matrix is too large | + + +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef + +! Allocation + + +allocate(R(mo_num,mo_num)) ! rotation matrix +allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + +! Definition of m and tmp_n +m = dim_list_act_orb +tmp_n = m*(m-1)/2 + +allocate(tmp_list(m)) +allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) +allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +! Method +! There are three different methods : +! - the "full" hessian, which uses all the elements of the hessian +! matrix" +! - the "diagonal" hessian, which uses only the diagonal elements of the +! hessian +! - without the hessian (hessian = identity matrix) + + +!Display the method + print*, 'Method :', optimization_method +if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n +elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +else + print*,'Unknown optimization_method, please select full, diag or none' + call abort +endif +print*, 'Absolute value of the hessian:', absolute_eig + +! Algorithm + +! Here is the main algorithm of the optimization: +! - First of all we initialize some parameters and we compute the +! criterion (the ci energy) before doing any MO rotations +! - We compute the gradient and the hessian for the active MOs +! - We diagonalize the hessian +! - We compute a step and loop to reduce the radius of the +! trust region (and the size of the step by the way) until the step is +! accepted +! - We repeat the process until the convergence +! NB: the convergence criterion can be changed + + +! Loop until the convergence of the optimization +! call diagonalize_ci + +!### Initialization ### +nb_iter = 0 +rho = 0.5d0 +not_converged = .True. +tmp_list = list_act ! Optimization of the active MOs +nb_cancel_tot = 0 + +! Renormalization of the weights of the states +call state_weight_normalization + +! Compute the criterion before the loop +call state_average_energy(prev_criterion) + +do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif +enddo + +! Deallocation, end + + +deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization/save_energy.irp.f new file mode 100644 index 00000000..5dac8da9 --- /dev/null +++ b/src/mo_optimization/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_fci_energy(E(1:N_states)) + call ezfio_set_fci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f new file mode 100644 index 00000000..2cd063da --- /dev/null +++ b/src/mo_optimization/state_average_energy.irp.f @@ -0,0 +1,72 @@ +! State average energy + +! Calculation of the state average energy from the integrals and the +! density matrices. + +! \begin{align*} +! E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +! \end{align*} +! $h_{ij}$: mono-electronic integral +! $\gamma_{ij}$: one electron density matrix +! $v_{ij}^{kl}$: bi-electronic integral +! $\Gamma_{ij}^{kl}$: two electrons density matrix + +! TODO: OMP version + +! PROVIDED: +! | mo_one_e_integrals | double precision | mono-electronic integrals | +! | get_two_e_integral | double precision | bi-electronic integrals | +! | one_e_dm_mo | double precision | one electron density matrix | +! | two_e_dm_mo | double precision | two electrons density matrix | +! | nuclear_repulsion | double precision | nuclear repulsion | +! | mo_num | integer | number of MOs | + +! Output: +! | energy | double precision | state average energy | + +! Internal: +! | mono_e | double precision | mono-electronic energy | +! | bi_e | double precision | bi-electronic energy | +! | i,j,k,l | integer | indexes to loop over the MOs | + + +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization/state_weight_normalization.irp.f new file mode 100644 index 00000000..27d30af7 --- /dev/null +++ b/src/mo_optimization/state_weight_normalization.irp.f @@ -0,0 +1,29 @@ +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization/update_parameters.irp.f new file mode 100644 index 00000000..88e8fc34 --- /dev/null +++ b/src/mo_optimization/update_parameters.irp.f @@ -0,0 +1,15 @@ +! Subroutine toupdate the parameters. +! Ex: TOUCH mo_coef ... + + +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization/update_st_av_ci_energy.irp.f new file mode 100644 index 00000000..18b72502 --- /dev/null +++ b/src/mo_optimization/update_st_av_ci_energy.irp.f @@ -0,0 +1,25 @@ +! Update the CI state average energy + +! Computes the state average energy +! \begin{align*} +! E =\sum_{i=1}^{N_{states}} E_i . w_i +! \end{align*} + +! $E_i$: energy of state i +! $w_i$: weight of state i + + +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..14d3c696 --- /dev/null +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -0,0 +1,16 @@ +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: k + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 272916e3..ada256a2 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -377,6 +377,7 @@ integer function load_mo_integrals(filename) integer*8 :: n, j load_mo_integrals = 1 open(unit=66,file=filename,FORM='unformatted',STATUS='UNKNOWN') + call lock_io() read(66,err=98,end=98) iknd, kknd if (iknd /= integral_kind) then print *, 'Wrong integrals kind in file :', iknd @@ -399,6 +400,7 @@ integer function load_mo_integrals(filename) n = mo_integrals_map%map(i)%n_elements read(66,err=99,end=99) (key(j), j=1,n), (val(j), j=1,n) enddo + call unlock_io() call map_sort(mo_integrals_map) load_mo_integrals = 0 return diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index ae299e9f..b7ef901d 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -50,13 +50,16 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call cpu_time(cpu_1) if(no_vvvv_integrals)then -! call four_idx_novvvv call four_idx_novvvv_old else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm + if (do_ao_cholesky) then + call add_integrals_to_map_cholesky else - call add_integrals_to_map(full_ijkl_bitmask_4) + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm + else + call add_integrals_to_map(full_ijkl_bitmask_4) + endif endif endif @@ -175,7 +178,7 @@ subroutine add_integrals_to_map(mask_ijkl) implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) @@ -450,13 +453,72 @@ subroutine add_integrals_to_map(mask_ijkl) end +subroutine add_integrals_to_map_cholesky + use bitmasks + implicit none + + BEGIN_DOC + ! Adds integrals to the MO map using Cholesky vectors + END_DOC + + integer :: i,j,k,l,m + integer :: size_buffer, n_integrals + size_buffer = min(mo_num*mo_num*mo_num,16000000) + + double precision, allocatable :: Vtmp(:,:,:,:) + integer(key_kind) , allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_value(:) + + if (.True.) then + ! In-memory transformation + + allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + + call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & + cholesky_mo, mo_num*mo_num, & + cholesky_mo, mo_num*mo_num, 0.d0, & + Vtmp, mo_num*mo_num) + + !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + n_integrals = 0 + !$OMP DO + do l=1,mo_num + do k=1,l + do j=1,mo_num + do i=1,j + if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then + n_integrals += 1 + buffer_value(n_integrals) = Vtmp(i,j,k,l) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 + endif + endif + enddo + enddo + enddo + enddo + !$OMP END DO + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + deallocate(buffer_i, buffer_value) + !$OMP END PARALLEL + + deallocate(Vtmp) + call map_unique(mo_integrals_map) + + endif + +end subroutine add_integrals_to_map_three_indices(mask_ijk) use bitmasks implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijk(N_int,3) diff --git a/src/mol_properties/EZFIO.cfg b/src/mol_properties/EZFIO.cfg new file mode 100644 index 00000000..35a095fb --- /dev/null +++ b/src/mol_properties/EZFIO.cfg @@ -0,0 +1,23 @@ +[print_all_transitions] +type: logical +doc: If true, print the transition between all the states +interface: ezfio,provider,ocaml +default: false + +[calc_dipole_moment] +type: logical +doc: If true, the electric dipole moment will be computed +interface: ezfio,provider,ocaml +default: false + +[calc_tr_dipole_moment] +type: logical +doc: If true and N_states > 1, the transition electric dipole moment will be computed +interface: ezfio,provider,ocaml +default: false + +[calc_osc_str] +type: logical +doc: If true and N_states > 1, the oscillator strength will be computed +interface: ezfio,provider,ocaml +default: false diff --git a/src/mol_properties/NEED b/src/mol_properties/NEED new file mode 100644 index 00000000..8d89a452 --- /dev/null +++ b/src/mol_properties/NEED @@ -0,0 +1,2 @@ +determinants +davidson_undressed diff --git a/src/mol_properties/README.md b/src/mol_properties/README.md new file mode 100644 index 00000000..637b76d7 --- /dev/null +++ b/src/mol_properties/README.md @@ -0,0 +1,25 @@ +# Molecular properties + +Available quantities: +- Electric dipole moment +- Electric transition dipole moment +- Oscillator strength + +They are not computed by default. To compute them: +``` +qp set mol_properties calc_dipole_moment true +qp set mol_properties calc_tr_dipole_moment true +qp set mol_properties calc_osc_str true +``` +If you are interested in transitions between two excited states: +``` +qp set mol_properties print_all_transitions true +``` +They can be obtained by running +``` +qp run properties +``` +or at each step of a cipsi calculation with +``` +qp run fci +``` diff --git a/src/mol_properties/ci_energy_no_diag.irp.f b/src/mol_properties/ci_energy_no_diag.irp.f new file mode 100644 index 00000000..a4407d3b --- /dev/null +++ b/src/mol_properties/ci_energy_no_diag.irp.f @@ -0,0 +1,13 @@ +BEGIN_PROVIDER [double precision, ci_energy_no_diag, (N_states) ] + + implicit none + + BEGIN_DOC + ! CI energy from density matrices and integrals + ! Avoid the rediagonalization for ci_energy + END_DOC + + ci_energy_no_diag = psi_energy + nuclear_repulsion + +END_PROVIDER + diff --git a/src/mol_properties/mo_deriv_1.irp.f b/src/mol_properties/mo_deriv_1.irp.f new file mode 100644 index 00000000..cfe6f789 --- /dev/null +++ b/src/mol_properties/mo_deriv_1.irp.f @@ -0,0 +1,30 @@ + BEGIN_PROVIDER [double precision, mo_deriv_1_x , (mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, mo_deriv_1_y , (mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, mo_deriv_1_z , (mo_num,mo_num)] + BEGIN_DOC + ! array of the integrals of MO_i * d/dx MO_j + ! array of the integrals of MO_i * d/dy MO_j + ! array of the integrals of MO_i * d/dz MO_j + END_DOC + implicit none + + call ao_to_mo( & + ao_deriv_1_x, & + size(ao_deriv_1_x,1), & + mo_deriv_1_x, & + size(mo_deriv_1_x,1) & + ) + call ao_to_mo( & + ao_deriv_1_y, & + size(ao_deriv_1_y,1), & + mo_deriv_1_y, & + size(mo_deriv_1_y,1) & + ) + call ao_to_mo( & + ao_deriv_1_z, & + size(ao_deriv_1_z,1), & + mo_deriv_1_z, & + size(mo_deriv_1_z,1) & + ) + +END_PROVIDER diff --git a/src/mol_properties/multi_s_deriv_1.irp.f b/src/mol_properties/multi_s_deriv_1.irp.f new file mode 100644 index 00000000..84bfecc9 --- /dev/null +++ b/src/mol_properties/multi_s_deriv_1.irp.f @@ -0,0 +1,69 @@ + BEGIN_PROVIDER [double precision, multi_s_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_deriv_1, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_deriv_1, (N_states, N_states)] + + implicit none + + BEGIN_DOC + ! Providers for : + ! + ! + ! + ! ||v|| = sqrt(v_x^2 + v_y^2 + v_z^2) + ! v_x = d/dx + ! Cf. multi_s_dipole_moment for the equations + END_DOC + + integer :: istate,jstate ! States + integer :: i,j ! general spatial MOs + double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z + + multi_s_x_deriv_1 = 0.d0 + multi_s_y_deriv_1 = 0.d0 + multi_s_z_deriv_1 = 0.d0 + + do jstate = 1, N_states + do istate = 1, N_states + + do i = 1, mo_num + do j = 1, mo_num + multi_s_x_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_x(j,i) + multi_s_y_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_y(j,i) + multi_s_z_deriv_1(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_deriv_1_z(j,i) + enddo + enddo + + enddo + enddo + + ! Nuclei part + nuclei_part_x = 0.d0 + nuclei_part_y = 0.d0 + nuclei_part_z = 0.d0 + + do i = 1,nucl_num + nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) + nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) + nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) + enddo + + ! Only if istate = jstate, otherwise 0 by the orthogonality of the states + do istate = 1, N_states + multi_s_x_deriv_1(istate,istate) += nuclei_part_x + multi_s_y_deriv_1(istate,istate) += nuclei_part_y + multi_s_z_deriv_1(istate,istate) += nuclei_part_z + enddo + + ! d = + do jstate = 1, N_states + do istate = 1, N_states + multi_s_deriv_1(istate,jstate) = & + dsqrt(multi_s_x_deriv_1(istate,jstate)**2 & + + multi_s_y_deriv_1(istate,jstate)**2 & + + multi_s_z_deriv_1(istate,jstate)**2) + enddo + enddo + +END_PROVIDER + diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f new file mode 100644 index 00000000..d5e62799 --- /dev/null +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -0,0 +1,93 @@ +! Providers for the dipole moments along x,y,z and the total dipole +! moments. + +! The dipole moment along the x axis is: +! \begin{align*} +! \mu_x = < \Psi_m | \sum_i x_i + \sum_A Z_A R_A | \Psi_n > +! \end{align*} +! where $i$ is used for the electrons and $A$ for the nuclei. +! $Z_A$ the charge of the nucleus $A$ and $R_A$ its position in the +! space. + +! And it can be computed using the (transition, if n /= m) density +! matrix as a expectation value +! \begin{align*} +! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} < \phi_p | x | \phi_p > +! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} < \phi_p | x | \phi_q > + < \Psi_m | \sum_A Z_A R_A | \Psi_n > +! \end{align*} + + + +BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment, (N_states, N_states)] + + implicit none + + BEGIN_DOC + ! Providers for : + ! <\Psi_m|\mu_x|\Psi_n> + ! <\Psi_m|\mu_y|\Psi_n> + ! <\Psi_m|\mu_z|\Psi_n> + ! ||\mu|| = \sqrt{\mu_x^2 + \mu_y^2 + \mu_z^2} + ! + ! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} \bra{\phi_p} x \ket{\phi_p} + ! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} \bra{\phi_p} x \ket{\phi_q} + ! \Psi: wf + ! n,m indexes for the states + ! p,q: general spatial MOs + ! gamma^{nm}: density matrix \bra{\Psi^n} a^{\dagger}_a a_i \ket{\Psi^m} + END_DOC + + integer :: istate,jstate ! States + integer :: i,j ! general spatial MOs + double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z + + multi_s_x_dipole_moment = 0.d0 + multi_s_y_dipole_moment = 0.d0 + multi_s_z_dipole_moment = 0.d0 + + do jstate = 1, N_states + do istate = 1, N_states + + do i = 1, mo_num + do j = 1, mo_num + multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) + multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) + multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) + enddo + enddo + + enddo + enddo + + ! Nuclei part + nuclei_part_x = 0.d0 + nuclei_part_y = 0.d0 + nuclei_part_z = 0.d0 + + do i = 1,nucl_num + nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) + nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) + nuclei_part_z += nucl_charge(i) * nucl_coord(i,3) + enddo + + ! Only if istate = jstate, otherwise 0 by the orthogonality of the states + do istate = 1, N_states + multi_s_x_dipole_moment(istate,istate) += nuclei_part_x + multi_s_y_dipole_moment(istate,istate) += nuclei_part_y + multi_s_z_dipole_moment(istate,istate) += nuclei_part_z + enddo + + ! d = + do jstate = 1, N_states + do istate = 1, N_states + multi_s_dipole_moment(istate,jstate) = & + dsqrt(multi_s_x_dipole_moment(istate,jstate)**2 & + + multi_s_y_dipole_moment(istate,jstate)**2 & + + multi_s_z_dipole_moment(istate,jstate)**2) + enddo + enddo + +END_PROVIDER diff --git a/src/mol_properties/print_mol_properties.irp.f b/src/mol_properties/print_mol_properties.irp.f new file mode 100644 index 00000000..3753a3dd --- /dev/null +++ b/src/mol_properties/print_mol_properties.irp.f @@ -0,0 +1,24 @@ +subroutine print_mol_properties() + + implicit none + + BEGIN_DOC + ! Run the propertie calculations + END_DOC + + ! Electric dipole moment + if (calc_dipole_moment) then + call print_dipole_moment + endif + + ! Transition electric dipole moment + if (calc_tr_dipole_moment .and. N_states > 1) then + call print_transition_dipole_moment + endif + + ! Oscillator strength + if (calc_osc_str .and. N_states > 1) then + call print_oscillator_strength + endif + +end diff --git a/src/mol_properties/print_properties.irp.f b/src/mol_properties/print_properties.irp.f new file mode 100644 index 00000000..4c0a9f38 --- /dev/null +++ b/src/mol_properties/print_properties.irp.f @@ -0,0 +1,194 @@ +! Dipole moments + +! Provided +! | N_states | integer | Number of states | +! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis | +! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis | +! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | + + +subroutine print_dipole_moment + + implicit none + + BEGIN_DOC + ! To print the dipole moment ||<\Psi_i|µ|\Psi_i>|| and its x,y,z components + END_DOC + + integer :: istate + double precision, allocatable :: d(:), d_x(:), d_y(:), d_z(:) + + allocate(d(N_states),d_x(N_states),d_y(N_states),d_z(N_states)) + + do istate = 1, N_states + d_x(istate) = multi_s_x_dipole_moment(istate,istate) + d_y(istate) = multi_s_y_dipole_moment(istate,istate) + d_z(istate) = multi_s_z_dipole_moment(istate,istate) + d(istate) = multi_s_dipole_moment(istate,istate) + enddo + + ! Atomic units + print*,'' + print*,'# Dipoles:' + print*,'==============================================' + print*,' Dipole moments (au)' + print*,' State X Y Z ||µ||' + + do istate = 1, N_states + write(*,'(I5,4(F12.6))') (istate-1), d_x(istate), d_y(istate), d_z(istate), d(istate) + enddo + + ! Debye + print*,'' + print*,' Dipole moments (D)' + print*,' State X Y Z ||µ||' + + do istate = 1, N_states + write(*,'(I5,4(F12.6))') (istate-1), d_x(istate)*au_to_D, d_y(istate)*au_to_D, d_z(istate)*au_to_D, d(istate)*au_to_D + enddo + + print*,'==============================================' + print*,'' + + deallocate(d,d_x,d_y,d_z) + + end + +! Transition dipole moments + +! Provided +! | N_states | integer | Number of states | +! | multi_s_x_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along x axis | +! | multi_s_y_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along y axis | +! | multi_s_z_dipole_moment(N_states,N_states) | double precision | (transition) dipole moments along z axis | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | + + +subroutine print_transition_dipole_moment + + implicit none + + BEGIN_DOC + ! To print the transition dipole moment ||<\Psi_i|µ|\Psi_j>|| and its components along x, y and z + END_DOC + + integer :: istate,jstate, n_states_print + double precision :: f, d, d_x, d_y, d_z, dip_str + + if (N_states == 1 .or. N_det == 1) then + return + endif + + print*,'' + print*,'# Transition dipoles:' + print*,'==============================================' + print*,' Transition dipole moments (au)' + write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + + if (print_all_transitions) then + n_states_print = N_states + else + n_states_print = 1 + endif + + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d_x = multi_s_x_dipole_moment(istate,jstate) + d_y = multi_s_y_dipole_moment(istate,jstate) + d_z = multi_s_z_dipole_moment(istate,jstate) + dip_str = d_x**2 + d_y**2 + d_z**2 + d = multi_s_dipole_moment(istate,jstate) + f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + enddo + enddo + + print*,'' + print*,' Transition dipole moments (D)' + write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d_x = multi_s_x_dipole_moment(istate,jstate) * au_to_D + d_y = multi_s_y_dipole_moment(istate,jstate) * au_to_D + d_z = multi_s_z_dipole_moment(istate,jstate) * au_to_D + d = multi_s_dipole_moment(istate,jstate) + dip_str = d_x**2 + d_y**2 + d_z**2 + f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + d = multi_s_dipole_moment(istate,jstate) * au_to_D + write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + enddo + enddo + print*,'==============================================' + print*,'' + +end + +! Oscillator strengths + +! Provided +! | N_states | integer | Number of states | +! | multi_s_dipole_moment(N_states,N_states) | double precision | Total (transition) dipole moments | +! | multi_s_deriv1_moment(N_states,N_states) | double precision | Total (transition) ... | +! | ci_energy_no_diag(N_states) | double precision | CI energy of each state | + +! Internal +! | f_l | double precision | Oscillator strength in length gauge | +! | f_v | double precision | Oscillator strength in velocity gauge | +! | f_m | double precision | Oscillator strength in mixed gauge | +! | n_states_print | integer | Number of printed states | + + +subroutine print_oscillator_strength + + implicit none + + BEGIN_DOC + ! https://doi.org/10.1016/j.cplett.2004.03.126 + ! Oscillator strength in: + ! - length gauge, f^l_{ij} = 2/3 (E_i - E_j) <\Psi_i|r|\Psi_j> <\Psi_j|r|\Psi_i> + ! - velocity gauge, f^v_{ij} = 2/3 (E_i - E_j)^(-1) <\Psi_i|v|\Psi_j> <\Psi_j|v|\Psi_i> + ! - mixed gauge, f^m_{ij} = -2i/3 <\Psi_i|r|\Psi_j> <\Psi_j|v|\Psi_i> + END_DOC + + integer :: istate,jstate,k, n_states_print + double precision :: f_l,f_v,f_m,d,v + + if (N_states == 1 .or. N_det == 1) then + return + endif + + print*,'' + print*,'# Oscillator strength:' + print*,'==============================================' + + if (print_all_transitions) then + n_states_print = N_states + else + n_states_print = 1 + endif + + write(*,'(A103)') ' Oscillator strength in length gauge (f_l), velocity gauge (f_v) and mixed length-velocity gauge (f_m)' + do jstate = 1, n_states_print !N_states + do istate = jstate + 1, N_states + d = multi_s_dipole_moment(istate,jstate) + v = multi_s_deriv_1(istate,jstate) + ! Length gauge + f_l = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + ! Velocity gauge + f_v = 2d0/3d0 * v * v * 1d0/dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) + ! Mixed gauge + f_m = 2d0/3d0 * d * v + + write(*,'(A19,I3,A9,F10.6,A5,F7.1,A10,F9.6,A6,F9.6,A6,F9.6,A8,F7.3)') ' # Transition n.', (istate-1), ': Excit.=', dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*ha_to_ev, & + ' eV ( ',dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*Ha_to_nm,' nm), f_l=',f_l, ', f_v=', f_v, ', f_m=', f_m, ', =', s2_values(istate) + !write(*,'(I4,I4,A4,I3,A6,F6.1,A6,F6.1)') (istate-1), (jstate-1), ' ->', (istate-1), ', %T1=', percent_exc(2,istate), ', %T2=',percent_exc(3,istate) + + enddo + enddo + + print*,'==============================================' + print*,'' + +end diff --git a/src/mol_properties/properties.irp.f b/src/mol_properties/properties.irp.f new file mode 100644 index 00000000..7ea6f9c3 --- /dev/null +++ b/src/mol_properties/properties.irp.f @@ -0,0 +1,14 @@ +program mol_properties + + implicit none + + BEGIN_DOC + ! Calculation of the properties + END_DOC + + read_wf = .True. + touch read_wf + + call print_mol_properties() + +end diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 7925fa7c..1fd39f6a 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -2,7 +2,7 @@ ! --- ! TODO : strong optmization : write the loops in a different way -! : for each couple of AO, the gaussian product are done once for all +! : for each couple of AO, the gaussian product are done once for all BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ] @@ -20,14 +20,14 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] ! = -0.25 x v1^2 \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2 ! + -0.50 x (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2) u12^2 v2^2 - ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 + ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 ! = v1^2 x int2_grad1u2_grad2u2_j1b2 ! + -0.5 x (grad_1 v1)^2 x int2_u2_j1b2 ! + -1.0 X V1 x (grad_1 v1) \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ] ! ! END_DOC - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z, r(3), delta, coef @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi call wall_time(time1) print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -151,7 +151,7 @@ END_PROVIDER ! ! deallocate(ac_mat) ! -!END_PROVIDER +!END_PROVIDER ! --- @@ -214,12 +214,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_nu call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -267,7 +267,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -297,12 +297,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_ call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -347,7 +347,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -370,26 +370,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao if(read_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="read") + read(11) tc_grad_square_ao close(11) else allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -401,11 +393,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -417,25 +409,25 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - + !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & + !!$OMP PRIVATE (i, j, k, l) & !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) !!$OMP DO SCHEDULE (static) ! do j = 1, ao_num ! do l = 1, ao_num ! do i = 1, ao_num ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) ! enddo ! enddo ! enddo @@ -444,23 +436,17 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao !!$OMP END PARALLEL endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index cb9e15c4..66f3c693 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -17,29 +17,21 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="read") + read(11) tc_grad_square_ao_test close(11) else provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test - + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -51,11 +43,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -67,23 +59,23 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao_test = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao_test, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) !do i = 1, ao_num ! do j = 1, ao_num ! do k = i, ao_num - + ! do l = max(j,k), ao_num ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! end do - + ! !if (j.eq.k) then ! ! do l = j+1, ao_num ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) @@ -95,14 +87,14 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! ! enddo ! !endif - + ! enddo ! enddo !enddo !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! integer :: ii @@ -121,10 +113,10 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! print *, ' ii =', ii ! !$OMP END DO ! !$OMP END PARALLEL - + ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! do j = 1, ao_num @@ -144,24 +136,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -189,7 +175,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -205,7 +191,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -226,7 +212,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -246,7 +232,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -276,12 +262,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -328,12 +314,12 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -381,7 +367,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi call wall_time(time1) print*, ' Wall time for grad12_j12_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a15f690a..754e1240 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -36,16 +36,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao close(11) else @@ -89,18 +81,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 47b05e52..901e3048 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -3,7 +3,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po BEGIN_DOC ! - ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! @@ -15,9 +15,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po ! if J(r1,r2) = u12 x v1 x v2 ! ! int2_grad1_u12_ao_test(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) ! ! @@ -35,25 +35,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="read") + read(11) int2_grad1_u12_ao_test close(11) + else - + if(j1b_type .eq. 3) then do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + z = final_grid_points(3,ipoint) tmp0 = 0.5d0 * v_1b(ipoint) tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -87,24 +80,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -114,9 +101,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ ! ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij > ! - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! - ! This is obtained by integration by parts. + ! This is obtained by integration by parts. ! END_DOC @@ -131,40 +118,32 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="read") + read(11) tc_grad_and_lapl_ao_test close(11) else - provide int2_grad1_u12_ao_test - + provide int2_grad1_u12_ao_test + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid - + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) - + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) @@ -173,19 +152,19 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + ac_mat = 0.d0 do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , 1.d0, ac_mat, ao_num*ao_num) - + enddo deallocate(b_mat) - + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -199,29 +178,23 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(ac_mat) endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_and_lapl_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index c1e010c7..4f8dc74d 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -56,6 +56,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao do i = 1, ao_num do k = 1, ao_num ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) enddo enddo enddo @@ -67,7 +68,26 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao END_PROVIDER +BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] ! --- + implicit none + integer :: i, j, k, l + double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_no_cycle ...' + call wall_time(wall0) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + call wall_time(wall1) + print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 +END_PROVIDER BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)] @@ -83,6 +103,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_nu do i = 1, ao_num do k = 1, ao_num ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist_test(k,i,l,j) = ao_two_e_coul(k,i,l,j) enddo enddo enddo diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f index 3c04316f..fabdc42e 100644 --- a/src/nuclei/nuclei.irp.f +++ b/src/nuclei/nuclei.irp.f @@ -241,13 +241,13 @@ END_PROVIDER enddo character*(80) :: buffer, dummy do - read(iunit,'(A80)',end=10) buffer - read(buffer,*) i ! First read i - read(buffer,*) i, element_name(i), dummy, element_mass(i) - enddo - 10 continue - close(10) - endif + read(iunit,'(A80)',end=10) buffer + read(buffer,*) i ! First read i + read(buffer,*) i, element_name(i), dummy, element_mass(i) + enddo + 10 continue + close(10) + endif IRP_IF MPI_DEBUG print *, irp_here, mpi_rank diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index 6dbcd5b8..f5007090 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -21,7 +21,7 @@ def mv_in_ezfio(ezfio,tmp): os.system(cmdmv) -# Getting the EZFIO + ##Getting the EZFIO EZFIO=sys.argv[1] EZFIO=EZFIO.replace("/", "") print(EZFIO) @@ -66,8 +66,20 @@ zip_in_ezfio(EZFIO,tmp) tmp="pts_charge_coord" fcoord = open(tmp,'w') fcoord.write(" 2\n") -fcoord.write(" "+str(n_charges)+' 3\n') -#fcoord.write(" "+' 3 '+str(n_charges)+' \n') +if(n_charges < 10): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000000): + fcoord.write(" "+str(n_charges)+' 3\n') for i in range(n_charges): fcoord.write(' '+coord_x[i]+'\n') for i in range(n_charges): diff --git a/src/scf_utils/NEED b/src/scf_utils/NEED index b89695da..292d343a 100644 --- a/src/scf_utils/NEED +++ b/src/scf_utils/NEED @@ -1,2 +1,3 @@ mo_guess bitmask +json diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3b9eaeb4..cf006035 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -12,6 +12,7 @@ END_DOC integer :: iteration_SCF,dim_DIIS,index_dim_DIIS + logical :: converged integer :: i,j logical, external :: qp_stop double precision, allocatable :: mo_coef_save(:,:) @@ -50,10 +51,8 @@ END_DOC ! PROVIDE FPS_SPF_matrix_AO Fock_matrix_AO - do while ( & - ( (max_error_DIIS > threshold_DIIS_nonzero) .or. & - (dabs(Delta_energy_SCF) > thresh_SCF) & - ) .and. (iteration_SCF < n_it_SCF_max) ) + converged = .False. + do while ( .not.converged .and. (iteration_SCF < n_it_SCF_max) ) ! Increment cycle number @@ -144,17 +143,49 @@ END_DOC SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF + converged = ( (max_error_DIIS <= threshold_DIIS_nonzero) .and. & + (dabs(Delta_energy_SCF) <= thresh_SCF) ) + ! Print results at the end of each iteration write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)') & iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS +! Write data in JSON file + + call lock_io + if (iteration_SCF == 1) then + write(json_unit, json_dict_uopen_fmt) + else + write(json_unit, json_dict_close_uopen_fmt) + endif + write(json_unit, json_int_fmt) 'iteration', iteration_SCF + write(json_unit, json_real_fmt) 'energy', energy_SCF + write(json_unit, json_real_fmt) 'delta_energy_SCF', Delta_energy_SCF + write(json_unit, json_real_fmt) 'max_error_DIIS', max_error_DIIS + write(json_unit, json_real_fmt) 'level_shift', level_shift + write(json_unit, json_int_fmt) 'dim_DIIS', dim_DIIS + call unlock_io + if (Delta_energy_SCF < 0.d0) then call save_mos + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + + call lock_io + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + call unlock_io + if (qp_stop()) exit enddo + write(json_unit, json_dict_close_fmtx) if (iteration_SCF < n_it_SCF_max) then mo_label = 'Canonical' @@ -166,6 +197,10 @@ END_DOC write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & '====','================','================','================','================' write(6,*) + if (converged) then + write(6,*) 'SCF converged' + endif + if(.not.frozen_orb_scf)then call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f new file mode 100644 index 00000000..3e89bbe2 --- /dev/null +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -0,0 +1,558 @@ + +! --- + +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav + double precision, intent(in) :: H_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: s2_out(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(3,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), h_p(:,:), s2(:) + real, allocatable :: y_s(:,:) + double precision, allocatable :: s_(:,:), s_tmp(:,:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:), S_d(:,:) + + double precision, allocatable :: W(:,:) + real, pointer :: S(:,:) + + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda + + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.5d0*dble(sze*m)*(N_st_diag*itermax) &! W, S + + 4.5d0*(N_st_diag*itermax)**2 &! h,y,y_s,s_, s_tmp + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + ! --- + + + allocate( W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + S_d(sze,N_st_diag), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + h_p(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st), & + s2(N_st_diag*itermax), & + y_s(N_st_diag*itermax,N_st_diag*itermax) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U +! call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call hcalc(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) + else + + ! Already computed in update below + continue + endif + ! Compute s_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2) + do j=1,shift2 + do i=1,shift2 + s_(i,j) = 0.d0 + do k=1,sze + s_(i,j) = s_(i,j) + U(k,i) * dble(S(k,j)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + ! Penalty method + ! -------------- + +! if (s2_eig) then +! h_p = s_ +! do k=1,shift2 +! h_p(k,k) = h_p(k,k) - expected_s2 +! enddo +! if (only_expected_s2) then +! alpha = 0.1d0 +! h_p = h + alpha*h_p +! else +! alpha = 0.0001d0 +! h_p = h + alpha*h_p +! endif +! else + h_p = h + alpha = 0.d0 +! endif + + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h_p(1,1), size(h_p, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + do k = 1, N_st_diag +! print*,'lambda(k) before = ',lambda(k) + lambda(k) = 0.d0 + do l = 1, shift2 + do m = 1, shift2 + lambda(k) += y(m,k) * h(m,l) * y(l,k) + enddo + enddo +! print*,'lambda(k) new = ',lambda(k) + enddo + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + enddo + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = s2(l) + to_print(3,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:3,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + if(converged)exit + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + s2_out(k) = s2(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f new file mode 100644 index 00000000..b9b85a96 --- /dev/null +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -0,0 +1,769 @@ + +subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. + ! + ! Computes $s_0 = \langle l_0 | S^2 | r_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate + double precision, allocatable :: s_0(:,:), v_0(:,:) + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) + do_right = .True. + call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + do istate = 1, N_st + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + enddo +end + +subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + +subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + + +subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical, intent(in) :: do_right + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1, do_right) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_tc_s2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (2) + call H_tc_s2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (3) + call H_tc_s2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (4) + call H_tc_s2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case default + call H_tc_s2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + end select +end +BEGIN_TEMPLATE + +subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev,hmono, htwoe, hthree, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep ! Loop over all determinants (/!\ not in psidet order) + + krow = psi_bilinear_matrix_rows(k_a) ! Index of alpha part of determinant k_a + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) ! Index of beta part of determinant k_a + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! -> Here, tmp_det is determinant k_a + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + + ! tmp_det2 is a single excitation of tmp_det in the beta spin + ! the alpha part is not defined yet + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + ! rows : | 1 2 3 4 | 1 3 4 6 | .... | 1 2 4 5 | + ! cols : | 1 1 1 1 | 2 2 2 2 | .... | 8 8 8 8 | + ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 | + ! ^ ^ + ! | | + ! l_a N_det + ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol + + ! Below we identify all the determinants with the same beta part + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + ! Get all single excitations from tmp_det(1,1) to buffer(1,?) + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) +! call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem + double precision :: hmono, htwoe, hthree + +! hij = diag_H_mat_elem(tmp_det,$N_int) + call diag_htilde_mu_mat_fock_bi_ortho ($N_int, tmp_det, hmono, htwoe, hthree, hij) + call get_s2(tmp_det,tmp_det,$N_int,sij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/h_tc_u0.irp.f similarity index 99% rename from src/tc_bi_ortho/u0_h_u0.irp.f rename to src/tc_bi_ortho/h_tc_u0.irp.f index afbe15a7..5e6150ea 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_u0.irp.f @@ -93,9 +93,6 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) - provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e - provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell - provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f index 212c8588..5dad91ca 100644 --- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f +++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f @@ -11,7 +11,7 @@ BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_tc, (psi_det_size) ] psi_average_norm_contrib_tc(:) = 0.d0 do k=1,N_states do i=1,N_det - psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i) + & + psi_average_norm_contrib_tc(i) = & dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k))*state_average_weight(k) enddo enddo @@ -26,33 +26,39 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc, (psi_det_size,N_states) ] &BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_tc, (psi_det_size) ] &BEGIN_PROVIDER [ integer, psi_det_sorted_tc_order, (psi_det_size) ] +&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] +&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] implicit none BEGIN_DOC ! Wave function sorted by determinants contribution to the norm (state-averaged) ! ! psi_det_sorted_tc_order(i) -> k : index in psi_det + ! + ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc + ! + ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc END_DOC integer :: i,j,k integer, allocatable :: iorder(:) allocate ( iorder(N_det) ) do i=1,N_det - psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) iorder(i) = i + psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i) enddo call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det) do i=1,N_det + psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i) do j=1,N_int psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i)) psi_det_sorted_tc(j,2,i) = psi_det(j,2,iorder(i)) enddo - psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i) - psi_det_sorted_tc_order(iorder(i)) = i + psi_det_sorted_tc_order(iorder(i)) = i enddo double precision :: accu do k=1,N_states accu = 0.d0 do i=1,N_det - psi_coef_sorted_tc(i,k) = dsqrt(dabs(psi_l_coef_bi_ortho(iorder(i),k)*psi_r_coef_bi_ortho(iorder(i),k))) + psi_coef_sorted_tc(i,k) = dsqrt(psi_average_norm_contrib_sorted_tc(i)) accu += psi_coef_sorted_tc(i,k)**2 enddo accu = 1.d0/dsqrt(accu) @@ -66,28 +72,16 @@ END_PROVIDER psi_average_norm_contrib_sorted_tc(N_det+1:psi_det_size) = 0.d0 psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0 - deallocate(iorder) - -END_PROVIDER - - BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)] -&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)] - BEGIN_DOC - ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc - ! psi_l_coef_sorted_bi_ortho : left coefficients corresponding to psi_det_sorted_tc - END_DOC - implicit none - integer :: i, j, k psi_r_coef_sorted_bi_ortho = 0.d0 psi_l_coef_sorted_bi_ortho = 0.d0 do i = 1, N_det - psi_r_coef_sorted_bi_ortho(i,1) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1) - psi_l_coef_sorted_bi_ortho(i,1) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1) + psi_r_coef_sorted_bi_ortho(i,1:N_states) = psi_r_coef_bi_ortho(iorder(i),1:N_states) + psi_l_coef_sorted_bi_ortho(i,1:N_states) = psi_l_coef_bi_ortho(iorder(i),1:N_states) enddo + deallocate(iorder) END_PROVIDER - BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_bit, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_bit, (psi_det_size,N_states) ] implicit none diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index ac9b0e74..521acff5 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -136,15 +136,15 @@ BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) END_PROVIDER -subroutine save_tc_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psilcoef,psircoef) +subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef) implicit none BEGIN_DOC ! Save the wave function into the |EZFIO| file END_DOC use bitmasks include 'constants.include.F' - integer, intent(in) :: ndet,nstates,dim_psicoef - integer(bit_kind), intent(in) :: psidet(N_int,2,ndet) + integer, intent(in) :: ndet,nstates,dim_psicoef,sze + integer(bit_kind), intent(in) :: psidet(N_int,2,sze) double precision, intent(in) :: psilcoef(dim_psicoef,nstates) double precision, intent(in) :: psircoef(dim_psicoef,nstates) integer*8, allocatable :: psi_det_save(:,:,:) @@ -188,23 +188,17 @@ subroutine save_tc_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psilcoef call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(psir_coef_save) deallocate (psil_coef_save,psir_coef_save) -! allocate (psi_coef_save(ndet_qp_edit,nstates)) -! do k=1,nstates -! do i=1,ndet_qp_edit -! psi_coef_save(i,k) = psicoef(i,k) -! enddo -! enddo -! -! call ezfio_set_determinants_psi_coef_qp_edit(psi_coef_save) -! deallocate (psi_coef_save) - call write_int(6,ndet,'Saved determinantsi and psi_r/psi_l coef') endif end subroutine save_tc_bi_ortho_wavefunction implicit none - call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho) + if(save_sorted_tc_wf)then + call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho) + else + call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho) + endif call routine_save_right_bi_ortho end @@ -214,9 +208,9 @@ subroutine routine_save_right_bi_ortho integer :: i allocate(coef_tmp(N_det, N_states)) do i = 1, N_det - coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states) + coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states) enddo - call save_wavefunction_general_unormalized(N_det,N_states,psi_det,size(coef_tmp,1),coef_tmp(1,1)) + call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1)) end subroutine routine_save_left_right_bi_ortho diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 9740ee2f..7b73d5f2 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -232,6 +232,7 @@ subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) other_spin(1) = 2 other_spin(2) = 1 + call get_excitation_degree(key_i, key_j, degree, Nint) hthree = 0.d0 diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index d094d76e..baca498c 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -94,6 +94,7 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) integer :: ipart, ihole double precision :: direct_int, exchange_int + nexc(1) = 0 nexc(2) = 0 !! Get all the holes and particles of key_i with respect to the ROHF determinant diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index cfa24f3b..bd0b1ef5 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -3,7 +3,6 @@ program tc_bi_ortho BEGIN_DOC ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC - print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 @@ -11,7 +10,7 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call routine_diag -! call test + call save_tc_bi_ortho_wavefunction end subroutine test @@ -19,18 +18,19 @@ subroutine test integer :: i,j double precision :: hmono,htwoe,hthree,htot use bitmasks - - print*,'test' -! call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot) - call double_htilde_mu_mat_bi_ortho(N_int,psi_det(1,1,1), psi_det(1,1,2), hmono, htwoe, htot) - print*,hmono, htwoe, htot + print*,'reading the wave function ' + do i = 1, N_det + call debug_det(psi_det(1,1,i),N_int) + print*,i,psi_l_coef_bi_ortho(i,1)*psi_r_coef_bi_ortho(i,1) + print*,i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) + enddo end subroutine routine_diag implicit none ! provide eigval_right_tc_bi_orth - provide overlap_bi_ortho +! provide overlap_bi_ortho ! provide htilde_matrix_elmt_bi_ortho integer ::i,j print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) @@ -46,16 +46,7 @@ subroutine routine_diag print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth print*,'Left/right eigenvectors' do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1) + write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) enddo - do j=1,N_states - do i=1,N_det - psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) - psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) - enddo - enddo - SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho - call save_tc_bi_ortho_wavefunction -! call routine_save_left_right_bi_ortho end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index d39b7a29..69302da2 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -12,12 +12,30 @@ enddo END_PROVIDER +subroutine diagonalize_CI_tc + implicit none + BEGIN_DOC +! Replace the coefficients of the |CI| states by the coefficients of the +! eigenstates of the |CI| matrix. + END_DOC + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) + psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) + enddo + enddo + + SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho +end + BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] &BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC @@ -29,64 +47,153 @@ logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) + double precision, parameter :: alpha = 0.1d0 + integer :: i_good_state,i_other_state, i_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + integer, allocatable :: iorder(:) PROVIDE N_det N_int if(n_det.le.N_det_max_full)then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) + allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) + if(s2_eig)then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo + endif + call non_hrmt_real_diag(N_det,H_prime,& leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& n_real_tc_bi_orth_eigval_right,eigval_right_tmp) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - integer, allocatable :: iorder(:) - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r,iorder,N_det) - igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det - iorder(i) = i - coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l,iorder,N_det) - igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) - - if(igood_r.ne.igood_l.and.igood_r.ne.1)then - print *,'' - print *,'Warning, the left and right eigenvectors are "not the same" ' - print *,'Warning, the ground state is not dominated by HF...' - print *,'State with largest RIGHT coefficient of HF ',igood_r - print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) - print *,'State with largest LEFT coefficient of HF ',igood_l - print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) - endif - if(state_following_tc)then - print *,'Following the states with the largest coef on HF' - print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r - eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) - enddo - i= igood_l - eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) - enddo - else - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) +! do i = 1, N_det +! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) +! enddo + call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + i_state = 0 + good_state_array = .False. + if(s2_eig)then + if (only_expected_s2) then + do j=1,N_det + ! Select at least n_states states with S^2 values closed to "expected_s2" +! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) + if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif enddo - enddo + else + do j=1,N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i=1,N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 + + else ! s2_eig + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det + iorder(i) = i + coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + + if(igood_r.ne.igood_l.and.igood_r.ne.1)then + print *,'' + print *,'Warning, the left and right eigenvectors are "not the same" ' + print *,'Warning, the ground state is not dominated by HF...' + print *,'State with largest RIGHT coefficient of HF ',igood_r + print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) + print *,'State with largest LEFT coefficient of HF ',igood_l + print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + endif + if(state_following_tc)then + print *,'Following the states with the largest coef on HF' + print *,'igood_r,igood_l',igood_r,igood_l + i= igood_r + eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) +! print*,reigvec_tc_bi_orth(j,1) + enddo + i= igood_l + eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) + enddo + else + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + endif endif else double precision, allocatable :: H_jj(:),vec_tmp(:,:) @@ -94,12 +201,18 @@ external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -108,12 +221,25 @@ vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + integer :: n_it_max,i_it + n_it_max = 1 + converged = .False. + i_it = 0 + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -123,7 +249,14 @@ vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + converged = .False. + i_it = 0 + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -137,6 +270,7 @@ norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,' = ',s2_eigvec_tc_bi_orth(1) END_PROVIDER diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f new file mode 100644 index 00000000..4debe2e2 --- /dev/null +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -0,0 +1,162 @@ +program test_tc + implicit none + read_wf = .True. + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call routine_test_s2 + call routine_test_s2_davidson +end + +subroutine routine_test_s2 + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking first the Left ' + do_right = .False. + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + print*,'Checking then the right ' + do_right = .True. + do i = 1, sze + u_0(i,1) = psi_r_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + +end + +subroutine routine_test_s2_davidson + implicit none + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) + integer :: i,istate + logical :: converged + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) + do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + enddo + ! Preparing the left-eigenvector + print*,'Computing the left-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + integer :: sze,N_st + logical :: do_right + sze = N_det + N_st = 1 + do_right = .False. + allocate(s_0_new(N_det,1),v_0_new(N_det,1)) + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + double precision :: accu_e_0, accu_s_0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'energies = ',energies + print*,'s2 = ',s2 + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + + ! Preparing the right-eigenvector + print*,'Computing the right-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + sze = N_det + N_st = 1 + do_right = .True. + v_0_new = 0.d0 + s_0_new = 0.d0 + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'energies = ',energies + print*,'s2 = ',s2 + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + +end diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 8765cd6e..85c8dac3 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -6,7 +6,7 @@ default: False [comp_left_eigv] type: logical -doc: If |true|, computes also the left-eigenvector +doc: If |true|, computes also the left-eigenvector interface: ezfio,provider,ocaml default: False @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: True [pure_three_body_h_tc] type: logical @@ -30,13 +30,13 @@ default: False [core_tc_op] type: logical -doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) +doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) interface: ezfio,provider,ocaml default: False [full_tc_h_solver] type: logical -doc: If |true|, you diagonalize the full TC H matrix +doc: If |true|, you diagonalize the full TC H matrix interface: ezfio,provider,ocaml default: False @@ -46,6 +46,12 @@ doc: Thresholds on the energy for iterative Davidson used in TC interface: ezfio,provider,ocaml default: 1.e-5 +[thrsh_cycle_tc] +type: Threshold +doc: Thresholds to cycle the integrals with the envelop +interface: ezfio,provider,ocaml +default: 1.e-10 + [max_it_dav] type: integer doc: nb max of iteration in Davidson used in TC @@ -60,11 +66,11 @@ default: 0.000005 [thresh_psi_r_norm] type: logical -doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. +doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. interface: ezfio,provider,ocaml default: False -[state_following_tc] +[state_following_tc] type: logical doc: If |true|, the states are re-ordered to match the input states default: False @@ -78,7 +84,7 @@ default: True [symetric_fock_tc] type: logical -doc: If |true|, using F+F^t as Fock TC +doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False @@ -126,7 +132,7 @@ default: 1.e-6 [maxovl_tc] type: logical -doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors +doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors interface: ezfio,provider,ocaml default: False @@ -152,7 +158,7 @@ default: 0. type: character*(32) doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] interface: ezfio,provider,ocaml -default: Simple +default: DIIS [im_thresh_tcscf] type: Threshold @@ -180,21 +186,15 @@ default: 1.e-6 [var_tc] type: logical -doc: If |true|, use VAR-TC +doc: If |true|, use VAR-TC interface: ezfio,provider,ocaml default: False -[read_tc_integ] -type: logical -doc: If |true|, read integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao +[io_tc_integ] +type: Disk_access +doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml -default: False - -[write_tc_integ] -type: logical -doc: If |true|, write integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao -interface: ezfio,provider,ocaml -default: False +default: None [debug_tc_pt2] type: integer @@ -207,3 +207,11 @@ type: logical doc: If |true|, only the right part of WF is used to compute spin dens interface: ezfio,provider,ocaml default: False + +[save_sorted_tc_wf] +type: logical +doc: If |true|, save the bi-ortho wave functions in a sorted way +interface: ezfio,provider,ocaml +default: True + + diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 187750ff..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,7 +8,7 @@ program tc_scf implicit none - print *, 'starting ...' + print *, ' starting ...' my_grid_becke = .True. my_n_pt_r_grid = 30 @@ -27,17 +27,37 @@ program tc_scf !call orthonormalize_mos() PROVIDE tcscf_algorithm - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() + PROVIDE var_tc + + if(var_tc) then + + print *, ' VAR-TC' + + if(tcscf_algorithm == 'DIIS') then + print*, ' NOT implemented yet' + elseif(tcscf_algorithm == 'Simple') then + call rh_vartcscf_simple() + else + print *, ' not implemented yet', tcscf_algorithm + stop + endif + else - print *, ' not implemented yet', tcscf_algorithm - stop + + if(tcscf_algorithm == 'DIIS') then + call rh_tcscf_diis() + elseif(tcscf_algorithm == 'Simple') then + call rh_tcscf_simple() + else + print *, ' not implemented yet', tcscf_algorithm + stop + endif + + call minimize_tc_orb_angles() + call print_energy_and_mos() + endif - call minimize_tc_orb_angles() - call print_energy_and_mos() end diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 0866cdaf..b9287d58 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -21,25 +21,22 @@ program test_ints touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid !! OK -!call routine_int2_u_grad1u_j1b2 -!! OK -!call routine_v_ij_erf_rk_cst_mu_j1b -!! OK +! call routine_int2_u_grad1u_j1b2 +! OK +! call routine_v_ij_erf_rk_cst_mu_j1b +! OK ! call routine_x_v_ij_erf_rk_cst_mu_j1b -!! OK -! call routine_v_ij_u_cst_mu_j1b - -!! OK -!call routine_int2_u2_j1b2 - -!! OK -!call routine_int2_u_grad1u_x_j1b2 - -!! OK +! OK +! call routine_int2_u2_j1b2 +! OK +! call routine_int2_u_grad1u_x_j1b2 +! OK ! call routine_int2_grad1u2_grad2u2_j1b2 ! call routine_int2_u_grad1u_j1b2 ! call test_total_grad_lapl ! call test_total_grad_square +! call test_int2_grad1_u12_ao_test +! call routine_v_ij_u_cst_mu_j1b_test ! call test_ao_tc_int_chemist ! call test_grid_points_ao ! call test_tc_scf @@ -53,12 +50,12 @@ program test_ints !call test_two_e_tc_non_hermit_integral() - call test_tc_grad_square_ao_test() - - PROVIDE TC_HF_energy VARTC_HF_energy - print *, ' TC_HF_energy = ', TC_HF_energy - print *, ' VARTC_HF_energy = ', VARTC_HF_energy +! call test_tc_grad_square_ao_test() +!!PROVIDE TC_HF_energy VARTC_HF_energy +!!print *, ' TC_HF_energy = ', TC_HF_energy +!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy + call test_old_ints end ! --- @@ -157,6 +154,9 @@ subroutine routine_int2_u_grad1u_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -169,20 +169,6 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -215,6 +201,9 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -228,20 +217,6 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -276,6 +251,10 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + + print*,'******' + print*,'******' + print*,'routine_x_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -290,20 +269,6 @@ subroutine routine_v_ij_u_cst_mu_j1b_test integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -336,6 +301,9 @@ subroutine routine_v_ij_u_cst_mu_j1b_test enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b_test' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -427,19 +395,6 @@ subroutine routine_int2_u2_j1b2 integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -473,6 +428,9 @@ subroutine routine_int2_u2_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u2_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -486,19 +444,6 @@ subroutine routine_int2_u_grad1u_x_j1b2 integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -534,6 +479,9 @@ subroutine routine_int2_u_grad1u_x_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_x_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -579,6 +527,9 @@ subroutine routine_v_ij_u_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -742,6 +693,9 @@ subroutine test_total_grad_lapl enddo enddo enddo + print*,'******' + print*,'******' + print*,' test_total_grad_lapl' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -767,6 +721,9 @@ subroutine test_total_grad_square enddo enddo enddo + print*,'******' + print*,'******' + print*,'test_total_grad_square' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -1057,3 +1014,86 @@ end ! --- + +subroutine test_old_ints + implicit none + integer :: i,j,k,l + double precision :: old, new, contrib, get_ao_tc_sym_two_e_pot + double precision :: integral_sym , integral_nsym,accu + PROVIDE ao_tc_sym_two_e_pot_in_map + accu = 0.d0 + do j = 1, ao_num + do l= 1, ao_num + do i = 1, ao_num + do k = 1, ao_num +! integral_sym = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map) + ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis +! integral_nsym = ao_non_hermit_term_chemist(k,i,l,j) +! old = integral_sym + integral_nsym +! old = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + new = ao_tc_int_chemist_test(k,i,l,j) + old = ao_tc_int_chemist_no_cycle(k,i,l,j) + contrib = dabs(old - new) + if(contrib.gt.1.d-6)then + print*,'problem !!' + print*,i,j,k,l + print*,old, new, contrib + endif + accu += contrib + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'in test_old_ints' + print*,'accu = ',accu/dble(ao_num**4) + +end + +subroutine test_int2_grad1_u12_ao_test + implicit none + integer :: i,j,ipoint,m,k,l + double precision :: weight,accu_relat, accu_abs, contrib + double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) + allocate(array(ao_num, ao_num, ao_num, ao_num)) + array = 0.d0 + allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) + array_ref = 0.d0 + do m = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + enddo + enddo + enddo + enddo + enddo + enddo + + accu_relat = 0.d0 + accu_abs = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) + accu_abs += contrib + if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then + accu_relat += contrib/dabs(array_ref(j,i,l,k)) + endif + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'test_int2_grad1_u12_ao_test' + print*,'accu_abs = ',accu_abs/dble(ao_num)**4 + print*,'accu_relat = ',accu_relat/dble(ao_num)**4 +end diff --git a/src/tools/print_e_conv.irp.f b/src/tools/print_e_conv.irp.f deleted file mode 100644 index e358ebc1..00000000 --- a/src/tools/print_e_conv.irp.f +++ /dev/null @@ -1,80 +0,0 @@ -program print_e_conv - implicit none - BEGIN_DOC -! program that prints in a human readable format the convergence of the CIPSI algorithm. -! -! for all istate, this program produces -! -! * a file "EZFIO.istate.conv" containing the variational and var+PT2 energies as a function of N_det -! -! * for istate > 1, a file EZFIO.istate.delta_e.conv containing the energy difference (both var and var+PT2) with the ground state as a function of N_det - END_DOC - - provide ezfio_filename - call routine_e_conv - end - -subroutine routine_e_conv - implicit none - BEGIN_DOC -! routine called by :c:func:`print_e_conv` - END_DOC - integer :: N_iter_tmp - integer :: i,istate - character*(128) :: output - integer :: i_unit_output,getUnitAndOpen - character*(128) :: filename - - integer, allocatable :: n_det_tmp(:) - call ezfio_get_iterations_N_iter(N_iter_tmp) - print*,'N_iter_tmp = ',N_iter_tmp - double precision, allocatable :: e(:,:),pt2(:,:) - allocate(e(N_states, 100),pt2(N_states, 100),n_det_tmp(100)) - call ezfio_get_iterations_energy_iterations(e) - call ezfio_get_iterations_pt2_iterations(pt2) - call ezfio_get_iterations_n_det_iterations(n_det_tmp) - - - do istate = 1, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - print*,filename - output=trim(ezfio_filename)//'.'//trim(filename)//'.conv' - output=trim(output) - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det E_var E_var + PT2' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,3(F16.10,X))')n_det_tmp(i),e(istate,i),e(istate,i) + pt2(istate,i) - enddo - enddo - - if(N_states.gt.1)then - double precision, allocatable :: deltae(:,:),deltae_pt2(:,:) - allocate(deltae(N_states,100),deltae_pt2(N_states,100)) - do i = 1, N_iter_tmp - do istate = 1, N_states - deltae(istate,i) = e(istate,i) - e(1,i) - deltae_pt2(istate,i) = e(istate,i) + pt2(istate,i) - (e(1,i) + pt2(1,i)) - enddo - enddo - do istate = 2, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - output=trim(ezfio_filename)//'.'//trim(filename)//'.delta_e.conv' - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det Delta E_var Delta (E_var + PT2)' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,100(F16.10,X))')n_det_tmp(i),deltae(istate,i),deltae_pt2(istate,i) - enddo - enddo - endif - -end diff --git a/src/tools/print_sorted_wf_coef.irp.f b/src/tools/print_sorted_wf_coef.irp.f new file mode 100644 index 00000000..fa0f1eab --- /dev/null +++ b/src/tools/print_sorted_wf_coef.irp.f @@ -0,0 +1,19 @@ +program sort_wf + implicit none + read_wf =.True. + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i= 1, N_det + write(i_unit_output,*)i,dabs(psi_coef_sorted(i,1)) + enddo + +end diff --git a/src/tools/save_natorb.irp.f b/src/tools/save_natorb.irp.f index f6331d13..b4e9f5dc 100644 --- a/src/tools/save_natorb.irp.f +++ b/src/tools/save_natorb.irp.f @@ -12,6 +12,7 @@ program save_natorb ! matrices of each state with the corresponding ! :option:`determinants state_average_weight` END_DOC + PROVIDE nucl_coord read_wf = .True. touch read_wf call save_natural_mos diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f new file mode 100644 index 00000000..95b1a964 --- /dev/null +++ b/src/tools/sort_wf.irp.f @@ -0,0 +1,20 @@ +program sort_wf + implicit none + read_wf = .true. + touch read_wf + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i = 1, N_det + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1))/dabs(psi_coef_sorted(1,1)) + enddo + +end diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg new file mode 100644 index 00000000..8606e908 --- /dev/null +++ b/src/trexio/EZFIO.cfg @@ -0,0 +1,54 @@ +[backend] +type: integer +doc: Back-end used in TREXIO. 0: HDF5, 1:Text +interface: ezfio, ocaml, provider +default: 0 + +[trexio_file] +type: character*(256) +doc: Name of the exported TREXIO file +interface: ezfio, ocaml, provider +default: None + +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + +[export_ao_one_e_ints] +type: logical +doc: If True, export one-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints] +type: logical +doc: If True, export two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints] +type: logical +doc: If True, export two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/README.rst b/src/trexio/README.rst new file mode 100644 index 00000000..7d7304c6 --- /dev/null +++ b/src/trexio/README.rst @@ -0,0 +1,6 @@ +====== +trexio +====== + +Module for handling TREXIO files. +See https://github.com/trex-coe/trexio diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f new file mode 100644 index 00000000..3ae0dcb4 --- /dev/null +++ b/src/trexio/export_trexio.irp.f @@ -0,0 +1,7 @@ +program export_trexio_prog + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + call export_trexio +end + diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f new file mode 100644 index 00000000..c55ddc5e --- /dev/null +++ b/src/trexio/export_trexio_routines.irp.f @@ -0,0 +1,623 @@ +subroutine export_trexio + use trexio + implicit none + BEGIN_DOC + ! Exports the wave function in TREXIO format + END_DOC + + integer(trexio_t) :: f(N_states) ! TREXIO file handle + integer(trexio_exit_code) :: rc + integer :: k + double precision, allocatable :: factor(:) + character*(256) :: filenames(N_states) + + filenames(1) = trexio_filename + do k=2,N_states + write(filenames(k),'(A,I3.3)') trim(trexio_filename)//'.', k-1 + enddo + + do k=1,N_states + print *, 'TREXIO file : ', trim(filenames(k)) + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + enddo + print *, '' + + do k=1,N_states + if (backend == 0) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + endif + if (f(k) == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + enddo + call ezfio_set_trexio_trexio_file(trexio_filename) + +! ------------------------------------------------------------------------------ + +! Electrons +! --------- + + print *, 'Electrons' + + rc = trexio_write_electron_up_num(f(1), elec_alpha_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_electron_dn_num(f(1), elec_beta_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Nuclei +! ------ + + print *, 'Nuclei' + + rc = trexio_write_nucleus_num(f(1), nucl_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_charge(f(1), nucl_charge) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_coord(f(1), nucl_coord_transp) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_label(f(1), nucl_label, 32) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_repulsion(f(1), nuclear_repulsion) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Pseudo-potentials +! ----------------- + + if (do_pseudo) then + + print *, 'ECP' + integer :: num + + num = 0 + do k=1,pseudo_klocmax + do i=1,nucl_num + if (pseudo_dz_k(i,k) /= 0.d0) then + num = num+1 + end if + end do + end do + + do l=0,pseudo_lmax + do k=1,pseudo_kmax + do i=1,nucl_num + if (pseudo_dz_kl(i,k,l) /= 0.d0) then + num = num+1 + end if + end do + end do + end do + + integer, allocatable :: ang_mom(:), nucleus_index(:), power(:), lmax(:) + double precision, allocatable :: exponent(:), coefficient(:) + + allocate(ang_mom(num), nucleus_index(num), exponent(num), coefficient(num), power(num), & + lmax(nucl_num) ) + + do i=1,nucl_num + lmax(i) = -1 + do l=0,pseudo_lmax + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + lmax(i) = max(lmax(i), l) + end if + end do + end do + end do + + j = 0 + do i=1,nucl_num + do k=1,pseudo_klocmax + if (pseudo_dz_k_transp(k,i) /= 0.d0) then + j = j+1 + ang_mom(j) = lmax(i)+1 + nucleus_index(j) = i + exponent(j) = pseudo_dz_k_transp(k,i) + coefficient(j) = pseudo_v_k_transp(k,i) + power(j) = pseudo_n_k_transp(k,i) + end if + end do + + do l=0,lmax(i) + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + j = j+1 + ang_mom(j) = l + nucleus_index(j) = i + exponent(j) = pseudo_dz_kl_transp(k,l,i) + coefficient(j) = pseudo_v_kl_transp(k,l,i) + power(j) = pseudo_n_kl_transp(k,l,i) + end if + end do + end do + end do + + + lmax(:) = lmax(:)+1 + rc = trexio_write_ecp_max_ang_mom_plus_1(f(1), lmax) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_z_core(f(1), int(nucl_charge_remove)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_num(f(1), num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_ang_mom(f(1), ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_nucleus_index(f(1), nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_exponent(f(1), exponent) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_coefficient(f(1), coefficient) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_power(f(1), power) + call trexio_assert(rc, TREXIO_SUCCESS) + + endif + + +! Basis +! ----- + + print *, 'Basis' + + + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(shell_num)) + if (ao_normalized) then + factor(1:shell_num) = shell_normalization_factor(1:shell_num) + else + factor(1:shell_num) = 1.d0 + endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + + deallocate(factor) + + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + + +! Atomic orbitals +! --------------- + + print *, 'AOs' + + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 + + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + +! One-e AO integrals +! ------------------ + + if (export_ao_one_e_ints) then + print *, 'AO one-e integrals' + + rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e AO integrals +! ------------------ + + if (export_ao_two_e_ints) then + print *, 'AO two-e integrals' + PROVIDE ao_two_e_integrals_in_map + + integer(8), parameter :: BUFSIZE=100000_8 + double precision :: eri_buffer(BUFSIZE), integral + integer(4) :: eri_index(4,BUFSIZE) + integer(8) :: icount, offset + + double precision, external :: get_ao_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,ao_num + do k=1,ao_num + do j=l,ao_num + do i=k,ao_num + if (i==j .and. k= 0_8) then + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e AO integrals - Cholesky +! ----------------------------- + + integer(4) :: chol_index(3,BUFSIZE) + double precision :: chol_buffer(BUFSIZE) + + if (export_ao_two_e_ints_cholesky) then + print *, 'AO two-e integrals Cholesky' + + rc = trexio_write_ao_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,ao_num + do i=1,ao_num + integral = cholesky_ao(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + + +! Molecular orbitals +! ------------------ + + print *, 'MOs' + + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) + call trexio_assert(rc, TREXIO_SUCCESS) + + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo + + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + +! One-e MO integrals +! ------------------ + + if (export_mo_one_e_ints) then + print *, 'MO one-e integrals' + + rc = trexio_write_mo_1e_int_kinetic(f(1),mo_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_1e_int_potential_n_e(f(1),mo_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_mo_1e_int_ecp(f(1),mo_pseudo_integrals_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_1e_int_core_hamiltonian(f(1),mo_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e MO integrals +! ------------------ + + if (export_mo_two_e_ints) then + print *, 'MO two-e integrals' + PROVIDE mo_two_e_integrals_in_map + + double precision, external :: mo_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=l,mo_num + do i=k,mo_num + if (i==j .and. k 0_8) then + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e MO integrals - Cholesky +! ----------------------------- + + if (export_mo_two_e_ints_cholesky) then + print *, 'MO two-e integrals Cholesky' + + rc = trexio_write_mo_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,mo_num + do i=1,mo_num + integral = cholesky_mo(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! One-e RDM +! --------- + + do k=1,N_states + rc = trexio_write_rdm_1e(f(k),one_e_dm_mo_alpha(:,:,k) + one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_up(f(k),one_e_dm_mo_alpha(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_dn(f(k),one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo + + +! Two-e RDM +! --------- + + if (export_rdm) then + PROVIDE two_e_dm_mo + print *, 'Two-e RDM' + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=1,mo_num + do i=1,mo_num + integral = two_e_dm_mo(i,j,k,l) + if (integral == 0.d0) cycle + icount += 1_8 + eri_buffer(icount) = integral + eri_index(1,icount) = i + eri_index(2,icount) = j + eri_index(3,icount) = k + eri_index(4,icount) = l + if (icount == BUFSIZE) then + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + end do + + if (icount >= 0_8) then + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! ------------------------------------------------------------------------------ + + ! Determinants + ! ------------ + + integer*8, allocatable :: det_buffer(:,:,:) + double precision, allocatable :: coef_buffer(:,:) + integer :: nint + + rc = trexio_get_int64_num(f(1), nint) + call trexio_assert(rc, TREXIO_SUCCESS) +! nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) + + do k=1, N_states + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f(k), n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + +! Will need to be updated with TREXIO 2.4 +! rc = trexio_write_state_id(f(k), k-1) + rc = trexio_write_state_id(f(k), k) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_state_file_name(f(k), filenames, len(filenames(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo + + do k=1,n_det + icount += 1_8 + det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) + coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) + if (icount == BUFSIZE) then + do i=1,N_states + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) + call trexio_assert(rc, TREXIO_SUCCESS) + end do + offset += icount + icount = 0_8 + end if + end do + + if (icount >= 0_8) then + do i=1,N_states + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) + call trexio_assert(rc, TREXIO_SUCCESS) + end do + end if + + deallocate ( det_buffer, coef_buffer ) + + do k=1,N_states + rc = trexio_close(f(k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo + +end + + +! -*- mode: f90 -*- diff --git a/src/trexio/import_trexio_determinants.irp.f b/src/trexio/import_trexio_determinants.irp.f new file mode 100644 index 00000000..1759bb94 --- /dev/null +++ b/src/trexio/import_trexio_determinants.irp.f @@ -0,0 +1,79 @@ +program import_determinants_ao + call run +end + +subroutine run + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import determinants from TREXIO + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer :: m + + double precision, allocatable :: coef_buffer(:,:) + integer*8 , allocatable :: det_buffer(:,:,:) + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + + + ! Determinants + ! ------------ + + integer :: nint, nstates + integer :: bufsize + + rc = trexio_read_state_num(f, nstates) + call trexio_assert(rc, TREXIO_SUCCESS) + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + + integer*8 :: offset, icount + + rc = trexio_read_determinant_num(f, bufsize) + call trexio_assert(rc, TREXIO_SUCCESS) + print *, 'N_det = ', bufsize + + allocate ( det_buffer(nint, 2, bufsize), coef_buffer(bufsize, n_states) ) + + + offset = 0_8 + icount = bufsize + + rc = trexio_read_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det: ', bufsize, icount + stop -1 + endif + + do m=1,nstates + rc = trexio_set_state(f, m-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_read_determinant_coefficient(f, offset, icount, coef_buffer(1,m)) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det for state', m, ':', icount, bufsize + stop -1 + endif + enddo + + call save_wavefunction_general(bufsize,nstates,det_buffer,size(coef_buffer,1),coef_buffer) + + +end diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f new file mode 100644 index 00000000..9f9ad9d6 --- /dev/null +++ b/src/trexio/import_trexio_integrals.irp.f @@ -0,0 +1,146 @@ +program import_integrals_ao + use trexio + implicit none + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + call run(f) + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) +end + +subroutine run(f) + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import integrals from TREXIO + END_DOC + + integer(trexio_t), intent(in) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer ::i,j,k,l + integer(8) :: m, n_integrals + double precision :: integral + + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + + + double precision, allocatable :: A(:,:) + double precision, allocatable :: V(:) + integer , allocatable :: Vi(:,:) + double precision :: s + + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then + rc = trexio_read_nucleus_repulsion(f, s) + call trexio_assert(rc, TREXIO_SUCCESS) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here, rc + print *, 'Error reading nuclear repulsion' + stop -1 + endif + call ezfio_set_nuclei_nuclear_repulsion(s) + call ezfio_set_nuclei_io_nuclear_repulsion('Read') + endif + + ! AO integrals + ! ------------ + + allocate(A(ao_num, ao_num)) + + + if (trexio_has_ao_1e_int_overlap(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_overlap(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO overlap' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap('Read') + endif + + if (trexio_has_ao_1e_int_kinetic(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_kinetic(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO kinetic integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic('Read') + endif + +! if (trexio_has_ao_1e_int_ecp(f) == TREXIO_SUCCESS) then +! rc = trexio_read_ao_1e_int_ecp(f, A) +! if (rc /= TREXIO_SUCCESS) then +! print *, irp_here +! print *, 'Error reading AO ECP local integrals' +! stop -1 +! endif +! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) +! call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo('Read') +! endif + + if (trexio_has_ao_1e_int_potential_n_e(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_potential_n_e(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO potential N-e integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e('Read') + endif + + deallocate(A) + + ! AO 2e integrals + ! --------------- + PROVIDE ao_integrals_map + + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + +end diff --git a/src/trexio/qp_import_trexio.py b/src/trexio/qp_import_trexio.py new file mode 100755 index 00000000..de8d1269 --- /dev/null +++ b/src/trexio/qp_import_trexio.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +convert TREXIO file to EZFIO + +Usage: + qp_import_trexio [-o EZFIO_DIR] FILE + +Options: + -o --output=EZFIO_DIR Produced directory + by default is FILE.ezfio + +""" + +import sys +import os +import trexio +import numpy as np +from functools import reduce +from ezfio import ezfio +from docopt import docopt + + +try: + QP_ROOT = os.environ["QP_ROOT"] + QP_EZFIO = os.environ["QP_EZFIO"] +except KeyError: + print("Error: QP_ROOT environment variable not found.") + sys.exit(1) +else: + sys.path = [QP_EZFIO + "/Python", + QP_ROOT + "/install/resultsFile", + QP_ROOT + "/install", + QP_ROOT + "/scripts"] + sys.path + + +def generate_xyz(l): + + def create_z(x,y,z): + return (x, y, l-(x+y)) + + def create_y(accu,x,y,z): + if y == 0: + result = [create_z(x,y,z)] + accu + else: + result = create_y([create_z(x,y,z)] + accu , x, y-1, z) + return result + + def create_x(accu,x,y,z): + if x == 0: + result = create_y([], x,y,z) + accu + else: + xnew = x-1 + ynew = l-xnew + result = create_x(create_y([],x,y,z) + accu , xnew, ynew, z) + return result + + result = create_x([], l, 0, 0) + result.reverse() + return result + + + +def write_ezfio(trexio_filename, filename): + + try: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_TEXT) + except: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_HDF5) + + ezfio.set_file(filename) + ezfio.set_trexio_trexio_file(trexio_filename) + + print("Nuclei\t\t...\t", end=' ') + + charge = [0.] + if trexio.has_nucleus(trexio_file): + charge = trexio.read_nucleus_charge(trexio_file) + ezfio.set_nuclei_nucl_num(len(charge)) + ezfio.set_nuclei_nucl_charge(charge) + + coord = trexio.read_nucleus_coord(trexio_file) + coord = np.transpose(coord) + ezfio.set_nuclei_nucl_coord(coord) + + label = trexio.read_nucleus_label(trexio_file) + nucl_num = trexio.read_nucleus_num(trexio_file) + + # Transformt H1 into H + import re + p = re.compile(r'(\d*)$') + label = [p.sub("", x).capitalize() for x in label] + ezfio.set_nuclei_nucl_label(label) + + else: + ezfio.set_nuclei_nucl_num(1) + ezfio.set_nuclei_nucl_charge([0.]) + ezfio.set_nuclei_nucl_coord([0.,0.,0.]) + ezfio.set_nuclei_nucl_label(["X"]) + + print("OK") + + + print("Electrons\t...\t", end=' ') + + try: + num_beta = trexio.read_electron_dn_num(trexio_file) + except: + num_beta = sum(charge)//2 + + try: + num_alpha = trexio.read_electron_up_num(trexio_file) + except: + num_alpha = sum(charge) - num_beta + + if num_alpha == 0: + print("\n\nError: There are zero electrons in the TREXIO file.\n\n") + sys.exit(1) + ezfio.set_electrons_elec_alpha_num(num_alpha) + ezfio.set_electrons_elec_beta_num(num_beta) + + print("OK") + + print("Basis\t\t...\t", end=' ') + + shell_num = 0 + try: + basis_type = trexio.read_basis_type(trexio_file) + + if basis_type.lower() not in ["gaussian", "slater"]: + raise TypeError + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) + + print("OK") + except: + print("None") + ezfio.set_ao_basis_ao_cartesian(True) + + print("AOS\t\t...\t", end=' ') + + try: + cartesian = trexio.read_ao_cartesian(trexio_file) + except: + cartesian = True + + if not cartesian: + raise TypeError('Only cartesian TREXIO files can be converted') + + ao_num = trexio.read_ao_num(trexio_file) + ezfio.set_ao_basis_ao_num(ao_num) + + if shell_num > 0: + ao_shell = trexio.read_ao_shell(trexio_file) + at = [ nucl_index[i]+1 for i in ao_shell ] + ezfio.set_ao_basis_ao_nucl(at) + + num_prim0 = [ 0 for i in range(shell_num) ] + for i in shell_index: + num_prim0[i] += 1 + + coef = {} + expo = {} + for i,c in enumerate(coefficient): + idx = shell_index[i] + if idx in coef: + coef[idx].append(c) + expo[idx].append(exponent[i]) + else: + coef[idx] = [c] + expo[idx] = [exponent[i]] + + coefficient = [] + exponent = [] + power_x = [] + power_y = [] + power_z = [] + num_prim = [] + + for i in range(shell_num): + for x,y,z in generate_xyz(ang_mom[i]): + power_x.append(x) + power_y.append(y) + power_z.append(z) + coefficient.append(coef[i]) + exponent.append(expo[i]) + num_prim.append(num_prim0[i]) + + assert (len(coefficient) == ao_num) + ezfio.set_ao_basis_ao_power(power_x + power_y + power_z) + ezfio.set_ao_basis_ao_prim_num(num_prim) + + prim_num_max = max( [ len(x) for x in coefficient ] ) + + for i in range(ao_num): + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + exponent [i] += [0. for j in range(len(exponent[i]), prim_num_max)] + + coefficient = reduce(lambda x, y: x + y, coefficient, []) + exponent = reduce(lambda x, y: x + y, exponent , []) + + coef = [] + expo = [] + for i in range(prim_num_max): + for j in range(i, len(coefficient), prim_num_max): + coef.append(coefficient[j]) + expo.append(exponent[j]) + +# ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) + ezfio.set_ao_basis_ao_coef(coef) + ezfio.set_ao_basis_ao_expo(expo) + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + + print("OK") + + + # _ + # |\/| _ _ |_) _. _ o _ + # | | (_) _> |_) (_| _> | _> + # + + print("MOS\t\t...\t", end=' ') + + labels = { "Canonical" : "Canonical", + "RHF" : "Canonical", + "BOYS" : "Localized", + "ROHF" : "Canonical", + "UHF" : "Canonical", + "Natural": "Natural" } + try: + label = labels[trexio.read_mo_type(trexio_file)] + except: + label = "None" + ezfio.set_mo_basis_mo_label(label) + + try: + clss = trexio.read_mo_class(trexio_file) + core = [ i for i in clss if i.lower() == "core" ] + inactive = [ i for i in clss if i.lower() == "inactive" ] + active = [ i for i in clss if i.lower() == "active" ] + virtual = [ i for i in clss if i.lower() == "virtual" ] + deleted = [ i for i in clss if i.lower() == "deleted" ] + except trexio.Error: + pass + + try: + mo_num = trexio.read_mo_num(trexio_file) + ezfio.set_mo_basis_mo_num(mo_num) + + MoMatrix = trexio.read_mo_coefficient(trexio_file) + ezfio.set_mo_basis_mo_coef(MoMatrix) + + mo_occ = [ 0. for i in range(mo_num) ] + for i in range(num_alpha): + mo_occ[i] += 1. + for i in range(num_beta): + mo_occ[i] += 1. + ezfio.set_mo_basis_mo_occ(mo_occ) + except: + pass + + print("OK") + + + print("Pseudos\t\t...\t", end=' ') + + ezfio.set_pseudo_do_pseudo(False) + + if trexio.has_ecp_ang_mom(trexio_file): + ezfio.set_pseudo_do_pseudo(True) + max_ang_mom_plus_1 = trexio.read_ecp_max_ang_mom_plus_1(trexio_file) + z_core = trexio.read_ecp_z_core(trexio_file) + ang_mom = trexio.read_ecp_ang_mom(trexio_file) + nucleus_index = trexio.read_ecp_nucleus_index(trexio_file) + exponent = trexio.read_ecp_exponent(trexio_file) + coefficient = trexio.read_ecp_coefficient(trexio_file) + power = trexio.read_ecp_power(trexio_file) + + lmax = max( max_ang_mom_plus_1 ) - 1 + ezfio.set_pseudo_pseudo_lmax(lmax) + ezfio.set_pseudo_nucl_charge_remove(z_core) + + prev_center = None + ecp = {} + for i in range(len(ang_mom)): + center = nucleus_index[i] + if center != prev_center: + ecp[center] = { "lmax": max_ang_mom_plus_1[center], + "zcore": z_core[center], + "contr": {} } + for j in range(max_ang_mom_plus_1[center]+1): + ecp[center]["contr"][j] = [] + + ecp[center]["contr"][ang_mom[i]].append( (coefficient[i], power[i], exponent[i]) ) + prev_center = center + + ecp_loc = {} + ecp_nl = {} + kmax = 0 + klocmax = 0 + for center in ecp: + ecp_nl [center] = {} + for k in ecp[center]["contr"]: + if k == ecp[center]["lmax"]: + ecp_loc[center] = ecp[center]["contr"][k] + klocmax = max(len(ecp_loc[center]), klocmax) + else: + ecp_nl [center][k] = ecp[center]["contr"][k] + kmax = max(len(ecp_nl [center][k]), kmax) + + ezfio.set_pseudo_pseudo_klocmax(klocmax) + ezfio.set_pseudo_pseudo_kmax(kmax) + + pseudo_n_k = [[0 for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_v_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_dz_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_n_kl = [[[0 for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_v_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_dz_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + for center in ecp_loc: + for k in range( len(ecp_loc[center]) ): + v, n, dz = ecp_loc[center][k] + pseudo_n_k[k][center] = n + pseudo_v_k[k][center] = v + pseudo_dz_k[k][center] = dz + + ezfio.set_pseudo_pseudo_n_k(pseudo_n_k) + ezfio.set_pseudo_pseudo_v_k(pseudo_v_k) + ezfio.set_pseudo_pseudo_dz_k(pseudo_dz_k) + + for center in ecp_nl: + for l in range( len(ecp_nl[center]) ): + for k in range( len(ecp_nl[center][l]) ): + v, n, dz = ecp_nl[center][l][k] + pseudo_n_kl[l][k][center] = n + pseudo_v_kl[l][k][center] = v + pseudo_dz_kl[l][k][center] = dz + + ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) + ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) + ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + + + print("OK") + + + + +def get_full_path(file_path): + file_path = os.path.expanduser(file_path) + file_path = os.path.expandvars(file_path) + return file_path + + +if __name__ == '__main__': + ARGUMENTS = docopt(__doc__) + + FILE = get_full_path(ARGUMENTS['FILE']) + trexio_filename = FILE + + if ARGUMENTS["--output"]: + EZFIO_FILE = get_full_path(ARGUMENTS["--output"]) + else: + EZFIO_FILE = "{0}.ezfio".format(FILE) + + write_ezfio(trexio_filename, EZFIO_FILE) + sys.stdout.flush() + diff --git a/src/trexio/trexio_file.irp.f b/src/trexio/trexio_file.irp.f new file mode 100644 index 00000000..c9897748 --- /dev/null +++ b/src/trexio/trexio_file.irp.f @@ -0,0 +1,20 @@ +BEGIN_PROVIDER [ character*(1024), trexio_filename ] + implicit none + BEGIN_DOC + ! Name of the TREXIO file + END_DOC + character*(1024) :: prefix + + trexio_filename = trexio_file + + if (trexio_file == 'None') then + prefix = trim(ezfio_work_dir)//trim(ezfio_filename) + if (backend == 0) then + trexio_filename = trim(prefix)//'.h5' + else if (backend == 1) then + trexio_filename = trim(prefix) + endif + endif +END_PROVIDER + + diff --git a/src/trexio/trexio_module.F90 b/src/trexio/trexio_module.F90 new file mode 100644 index 00000000..acd08492 --- /dev/null +++ b/src/trexio/trexio_module.F90 @@ -0,0 +1 @@ +#include "trexio_f.f90" diff --git a/src/two_body_rdm/io_two_rdm.irp.f b/src/two_body_rdm/io_two_rdm.irp.f index f7008ca9..bdd8a4f9 100644 --- a/src/two_body_rdm/io_two_rdm.irp.f +++ b/src/two_body_rdm/io_two_rdm.irp.f @@ -1,15 +1,17 @@ subroutine write_array_two_rdm(n_orb,nstates,array_tmp,name_file) implicit none integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,nstates) character*(128) :: output integer :: i_unit_output,getUnitAndOpen - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'W') + call lock_io() write(i_unit_output)array_tmp + call unlock_io() close(unit=i_unit_output) end @@ -18,12 +20,14 @@ subroutine read_array_two_rdm(n_orb,nstates,array_tmp,name_file) character*(128) :: output integer :: i_unit_output,getUnitAndOpen integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,N_states) - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'R') + call lock_io() read(i_unit_output)array_tmp + call unlock_io() close(unit=i_unit_output) end diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 7e35fc7b..99be1f54 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,13 +16,13 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate - do l=1,mo_num + do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) - do k=1,mo_num + do k=1,n_core_inact_act_orb korb = list_core_inact_act(k) - do j=1,mo_num + do j=1,n_core_inact_act_orb jorb = list_core_inact_act(j) - do i=1,mo_num + do i=1,n_core_inact_act_orb iorb = list_core_inact_act(i) two_e_dm_mo(iorb,jorb,korb,lorb) = state_av_full_occ_2_rdm_spin_trace_mo(i,j,k,l) enddo @@ -30,7 +30,6 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) -! * 2.d0 END_PROVIDER diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 1378d367..7f7458b6 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -1,7 +1,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_error) implicit none - + BEGIN_DOC ! Format for double precision, value(error) END_DOC @@ -14,7 +14,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! out ! | format_value | character | string FX.Y for the format | - ! | str_error | character | string of the error | + ! | str_error | character | string of the error | ! internal ! | str_size | character | size in string format | @@ -33,6 +33,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err character(len=20) :: str_size, str_nb_digits, str_exp integer :: nb_digits + call lock_io() ! max_nb_digit: Y max ! size_nb = Size of the double: X (FX.Y) write(str_size,'(I3)') size_nb @@ -40,17 +41,17 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! Error write(str_exp,'(1pE20.0)') error str_error = trim(adjustl(str_exp)) - + ! Number of digit: Y (FX.Y) from the exponent str_nb_digits = str_exp(19:20) read(str_nb_digits,*) nb_digits - + ! If the error is 0d0 - if (error <= 1d-16) then + if (error <= 1d-16) then write(str_nb_digits,*) max_nb_digits endif - ! If the error is too small + ! If the error is too small if (nb_digits > max_nb_digits) then write(str_nb_digits,*) max_nb_digits str_error(1:1) = '0' @@ -65,7 +66,8 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err ! FX.Y,A1,A1,A1 for value(str_error) !string = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))//',A1,A1,A1' - ! FX.Y just for the value + ! FX.Y just for the value format_value = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits)) + call unlock_io() end diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index c02560e3..3b43d607 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1854,7 +1854,7 @@ do k = 1, N end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place -U = 0.00D+0 +U(:,:) = 0.00D+0 do k = 1, N l = piv(k) U(l, :) = A(1:rank, k) diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index d5a066a1..115b2cbe 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -8,7 +8,9 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] qp_max_mem = 2000 call getenv('QP_MAXMEM',env) if (trim(env) /= '') then + call lock_io() read(env,*) qp_max_mem + call unlock_io() endif call write_int(6,qp_max_mem,'Target maximum memory (GB)') @@ -25,7 +27,7 @@ subroutine resident_memory(value) character*(32) :: key double precision, intent(out) :: value - call omp_set_lock(file_lock) + call lock_io() call usleep(10) value = 0.d0 @@ -40,7 +42,7 @@ subroutine resident_memory(value) 20 continue close(iunit) value = value / (1024.d0*1024.d0) - call omp_unset_lock(file_lock) + call unlock_io() end function subroutine total_memory(value) @@ -53,6 +55,7 @@ subroutine total_memory(value) character*(32) :: key double precision, intent(out) :: value + call lock_io() iunit = getUnitAndOpen('/proc/self/status','r') do read(iunit,*,err=10,end=20) key, value @@ -64,6 +67,7 @@ subroutine total_memory(value) 20 continue close(iunit) value = value / (1024.d0*1024.d0) + call unlock_io() end function double precision function memory_of_double(n) diff --git a/src/utils/units.irp.f b/src/utils/units.irp.f index 1850b28b..51dcec82 100644 --- a/src/utils/units.irp.f +++ b/src/utils/units.irp.f @@ -1,22 +1,32 @@ BEGIN_PROVIDER [double precision, ha_to_ev] +&BEGIN_PROVIDER [double precision, au_to_D] +&BEGIN_PROVIDER [double precision, planck_cte] +&BEGIN_PROVIDER [double precision, light_speed] +&BEGIN_PROVIDER [double precision, Ha_to_J] +&BEGIN_PROVIDER [double precision, Ha_to_nm] implicit none + BEGIN_DOC - ! Converstion from Hartree to eV + ! Some conversion between different units END_DOC - ha_to_ev = 27.211396641308d0 - -END_PROVIDER - -BEGIN_PROVIDER [double precision, au_to_D] - - implicit none - BEGIN_DOC - ! Converstion from au to Debye - END_DOC + ! Hartree to eV + Ha_to_eV = 27.211396641308d0 + ! au to Debye au_to_D = 2.5415802529d0 -END_PROVIDER + ! Planck's constant in SI units + planck_cte = 6.62606957d-34 + ! Light speed in SI units + light_speed = 2.99792458d10 + + ! Hartree to Joule + Ha_to_J = 4.35974434d-18 + + ! Hartree to nm + Ha_to_nm = 1d9 * (planck_cte * light_speed) / Ha_to_J + +END_PROVIDER diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg new file mode 100644 index 00000000..71ee87e3 --- /dev/null +++ b/src/utils_cc/EZFIO.cfg @@ -0,0 +1,77 @@ +[cc_thresh_conv] +type: double precision +doc: Threshold for the convergence of the residual equations. +interface: ezfio,ocaml,provider +default: 1e-6 + +[cc_max_iter] +type: integer +doc: Maximum number of iterations. +interface: ezfio,ocaml,provider +default: 100 + +[cc_diis_depth] +type: integer +doc: Maximum depth of the DIIS, i.e., maximum number of iterations that the DIIS keeps in memory. Warning, we allocate matrices with the diis depth at the beginning without update. If you don't have enough memory it should crash in memory. +interface: ezfio,ocaml,provider +default: 8 + +[cc_level_shift] +type: double precision +doc: Level shift for the CC +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_level_shift_guess] +type: double precision +doc: Level shift for the guess of the CC amplitudes +interface: ezfio,ocaml,provider +default: 0.0 + +[cc_update_method] +type: character*(32) +doc: Method used to update the CC amplitudes. none -> normal, diis -> with diis. +interface: ezfio,ocaml,provider +default: diis + +[cc_guess_t1] +type: character*(32) +doc: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[cc_guess_t2] +type: character*(32) +doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. +interface: ezfio,ocaml,provider +default: MP + +[cc_write_t1] +type: logical +doc: If true, it will write on disk the T1 amplitudes at the end of the calculation. +interface: ezfio,ocaml,provider +default: False + +[cc_write_t2] +type: logical +doc: If true, it will write on disk the T2 amplitudes at the end of the calculation. +interface: ezfio,ocaml,provider +default: False + +[cc_par_t] +type: logical +doc: If true, the CCSD(T) will be computed. +interface: ezfio,ocaml,provider +default: False + +[cc_dev] +type: logical +doc: Only for dev purposes. +interface: ezfio,ocaml,provider +default: False + +[cc_ref] +type: integer +doc: Index of the reference determinant in psi_det for CC calculation. +interface: ezfio,ocaml,provider +default: 1 diff --git a/src/utils_cc/NEED b/src/utils_cc/NEED new file mode 100644 index 00000000..bd5a151f --- /dev/null +++ b/src/utils_cc/NEED @@ -0,0 +1,4 @@ +hartree_fock +two_body_rdm +bitmask +determinants diff --git a/src/utils_cc/README.md b/src/utils_cc/README.md new file mode 100644 index 00000000..87cde388 --- /dev/null +++ b/src/utils_cc/README.md @@ -0,0 +1,34 @@ +# Utils for CC + +Utils for the CC modules. + +## Contents +- Providers related to reference occupancy +- Integrals related to the reference +- Diis for CC (but can be used for something else if you provide your own error vector) +- Guess for CC amplitudes +- Routines to update the CC amplitudes +- Phase between to arbitrary determinants +- print of the qp edit wf + +## Keywords +- cc_thresh_conv: Threshold for the convergence of the residual equations. Default: 1e-6. +- cc_max_iter: Maximum number of iterations. Default: 100. +- cc_diis_depth: Diis depth. Default: 8. +- cc_level_shift: Level shift for the CC. Default: 0.0. +- cc_level_shift_guess: Level shift for the MP guess of the amplitudes. Default: 0.0. +- cc_update_method: Method used to update the CC amplitudes. none -> normal, diis -> with diis. Default: diis. +- cc_guess_t1: Guess used to initialize the T1 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP. +- cc_guess_t2: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation theory, read -> read from disk. Default: MP. +- cc_write_t1: If true, it will write on disk the T1 amplitudes at the end of the calculation. Default: False. +- cc_write_t2: If true, it will write on disk the T2 amplitudes at the end of the calculation. Default: False. +- cc_par_t: If true, the CCSD(T) will be computed. +- cc_ref: Index of the reference determinant in psi_det for CC calculation. Default: 1. + +## Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh and +mv *.irp.f ../. +``` diff --git a/src/utils_cc/diis.irp.f b/src/utils_cc/diis.irp.f new file mode 100644 index 00000000..fe771373 --- /dev/null +++ b/src/utils_cc/diis.irp.f @@ -0,0 +1,529 @@ +! Code + +subroutine diis_cc(all_err,all_t,sze,m,iter,t) + + implicit none + + BEGIN_DOC + ! DIIS. Take the error vectors and the amplitudes of the previous + ! iterations to compute the new amplitudes + END_DOC + + ! {err_i}_{i=1}^{m_it} -> B -> c + ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1} + + integer, intent(in) :: m,iter,sze + double precision, intent(in) :: all_err(sze,m) + double precision, intent(in) :: all_t(sze,m) + + double precision, intent(out) :: t(sze) + + double precision, allocatable :: B(:,:), c(:), zero(:) + integer :: m_iter + integer :: i,j,k + integer :: info + integer, allocatable :: ipiv(:) + double precision :: accu + + m_iter = min(m,iter) + !print*,'m_iter',m_iter + allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1)) + allocate(ipiv(m+1)) + + ! B(i,j) = < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us + B = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(B,m,m_iter,sze,all_err) & + !$OMP PRIVATE(i,j,k,accu) & + !$OMP DEFAULT(NONE) + do j = 1, m_iter + do i = 1, m_iter + accu = 0d0 + !$OMP DO + do k = 1, sze + ! the errors of the ith iteration are in all_err(:,m+1-i) + accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j) + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + B(i,j) = B(i,j) + accu + !$OMP END CRITICAL + enddo + enddo + !$OMP END PARALLEL + + do i = 1, m_iter + B(i,m_iter+1) = -1 + enddo + do j = 1, m_iter + B(m_iter+1,j) = -1 + enddo + ! Debug + !print*,'B' + !do i = 1, m_iter+1 + ! write(*,'(100(F10.6))') B(i,:) + !enddo + + ! (0 0 .... 0 -1) + zero = 0d0 + zero(m_iter+1) = -1d0 + + ! Solve B.c = zero + call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info) + if (info /= 0) then + print*,'DIIS error in dgesv:', info + call abort + endif + ! c corresponds to the m_iter first solutions + c = zero(1:m_iter) + ! Debug + !print*,'c',c + !print*,'all_t' + !do i = 1, m + ! write(*,'(100(F10.6))') all_t(:,i) + !enddo + !print*,'all_err' + !do i = 1, m + ! write(*,'(100(F10.6))') all_err(:,i) + !enddo + + ! update T + !$OMP PARALLEL & + !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) & + !$OMP PRIVATE(i,j,accu) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, sze + t(i) = 0d0 + enddo + !$OMP END DO + do i = 1, m_iter + !$OMP DO + do j = 1, sze + t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i)) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + !print*,'new t',t + + deallocate(ipiv,B,c,zero) + +end + +! Update all err + +subroutine update_all_err(err,all_err,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the err vectors of the previous iterations to add the new one + ! The last err vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: err(sze) + double precision, intent(inout) :: all_err(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_err,err,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_err(j,i) = all_err(j,i+1) + enddo + !$OMP END DO + enddo + + ! Debug + !print*,'shift err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + + ! New + !$OMP DO + do i = 1, sze + all_err(i,m) = err(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + +end + +! Update all t + +subroutine update_all_t(t,all_t,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the t vectors of the previous iterations to add the new one + ! The last t vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: t(sze) + double precision, intent(inout) :: all_t(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_t,t,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_t(j,i) = all_t(j,i+1) + enddo + !$OMP END DO + enddo + + ! New + !$OMP DO + do i = 1, sze + all_t(i,m) = t(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated t' + !do i = 1, m + ! print*,i, all_t(:,i) + !enddo + +end + +! Err1 + +subroutine compute_err1(nO,nV,f_o,f_v,r1,err1) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t1 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO,nV) + + double precision, intent(out) :: err1(nO,nV) + + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Err2 + +subroutine compute_err2(nO,nV,f_o,f_v,r2,err2) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t2 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV) + + double precision, intent(out) :: err2(nO,nO,nV,nV) + + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! Update t + +subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + if (cc_update_method == 'diis') then + + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! DIIS T1, it is not always good since the t1 can be small + ! That's why there is a call to update the t1 in the standard way + ! T1 error tensor + !call compute_err1(nO,nV,f_o,f_v,r1,err1) + ! Transfo errors and parameters in vectors + !tmp_err1 = reshape(err1,(/nO*nV/)) + !tmp_t1 = reshape(t1 ,(/nO*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + !call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1) + !t1 = reshape(tmp_t1 ,(/nO,nV/)) + call update_t1(nO,nV,f_o,f_v,r1,t1) + + ! DIIS T2 + ! T2 error tensor + call compute_err2(nO,nV,f_o,f_v,r2,err2) + ! Transfo errors and parameters in vectors + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + +end + +! Update t v2 + +subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: all_t(:,:), all_err(:,:), tmp_t(:) + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + integer :: i,j + + ! Allocate + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(tmp_t(nO*nV+nO*nO*nV*nV)) + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! Compute the errors and reshape them as vector + call compute_err1(nO,nV,f_o,f_v,r1,err1) + call compute_err2(nO,nV,f_o,f_v,r2,err2) + tmp_err1 = reshape(err1,(/nO*nV/)) + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t1 = reshape(t1 ,(/nO*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Gather the different parameters and errors + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,& + !$OMP all_t,all_t1,all_t2) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_err(i,j) = all_err1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_err(i+nO*nV,j) = all_err2(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_t(i,j) = all_t1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_t(i+nO*nV,j) = all_t2(i,j) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp_t1(i) = tmp_t(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp_t2(i) = tmp_t(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Reshape as tensors + t1 = reshape(tmp_t1 ,(/nO,nV/)) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + ! Deallocate + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err) + +end + +! Update t v3 + +subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV) + double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: tmp(:) + + integer :: i,j + + ! Allocate + allocate(tmp(nO*nV+nO*nO*nV*nV)) + + ! Compute the errors + call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV)) + call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp(i) = t1(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp(i+nO*nV) = t2(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + t1(i) = tmp(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + t2(i) = tmp(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Deallocate + deallocate(tmp) + +end diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f new file mode 100644 index 00000000..33e0cbae --- /dev/null +++ b/src/utils_cc/energy.irp.f @@ -0,0 +1,13 @@ +subroutine det_energy(det,energy) + + implicit none + + integer(bit_kind), intent(in) :: det + + double precision, intent(out) :: energy + + call i_H_j(det,det,N_int,energy) + + energy = energy + nuclear_repulsion + +end diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f new file mode 100644 index 00000000..42acdf78 --- /dev/null +++ b/src/utils_cc/guess_t.irp.f @@ -0,0 +1,213 @@ +! T1 + +subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + + ! inout + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (trim(cc_guess_t1) == 'none') then + t1 = 0d0 + else if (trim(cc_guess_t1) == 'MP') then + do a = 1, nV + do i = 1, nO + t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess) + enddo + enddo + else if (trim(cc_guess_t1) == 'read') then + call read_t1(nO,nV,t1) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1) + call abort + endif + +end + +! T2 + +subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV) + + ! inout + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (trim(cc_guess_t2) == 'none') then + t2 = 0d0 + else if (trim(cc_guess_t2) == 'MP') then + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess) + enddo + enddo + enddo + enddo + else if (trim(cc_guess_t2) == 'read') then + call read_t2(nO,nV,t2) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2) + call abort + endif + +end + +! T1 + +subroutine write_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Write the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (cc_write_t1) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + write(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + endif + +end + +! T2 + +subroutine write_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Write the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (cc_write_t2) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + write(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + endif + +end + +! T1 + +subroutine read_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Read the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t1 = True' + print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + read(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + +end + +! T2 + +subroutine read_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Read the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t2 = True' + print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + +end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f new file mode 100644 index 00000000..9e244d82 --- /dev/null +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -0,0 +1,1256 @@ +! F + +subroutine gen_f_space(det,n1,n2,list1,list2,f) + + implicit none + + integer, intent(in) :: n1,n2 + integer, intent(in) :: list1(n1),list2(n2) + integer(bit_kind), intent(in) :: det(N_int,2) + double precision, intent(out) :: f(n1,n2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i1,i2,idx1,idx2 + + allocate(tmp_F(mo_num,mo_num)) + + call get_fock_matrix_spin(det,1,tmp_F) + + !$OMP PARALLEL & + !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) & + !$OMP PRIVATE(idx1,idx2,i1,i2)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do i2 = 1, n2 + do i1 = 1, n1 + idx2 = list2(i2) + idx1 = list1(i1) + f(i1,i2) = tmp_F(idx1,idx2) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_F) + +end + +! V + +subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) + + implicit none + + integer, intent(in) :: n1,n2,n3,n4 + integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) + double precision, intent(out) :: v(n1,n2,n3,n4) + + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + idx4 = list4(i4) + idx3 = list3(i3) + idx2 = list2(i2) + idx1 = list1(i1) + v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! full + +BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] + + implicit none + + integer :: i,j,k,l + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & + !$OMP PRIVATE(i,j,k,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + +! oooo + +BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + +END_PROVIDER + +! vooo + +BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + +END_PROVIDER + +! ovoo + +BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + +END_PROVIDER + +! oovo + +BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + +END_PROVIDER + +! ooov + +BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + +END_PROVIDER + +! vvoo + +BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + +END_PROVIDER + +! vovo + +BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + +END_PROVIDER + +! voov + +BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + +END_PROVIDER + +! ovvo + +BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + +END_PROVIDER + +! ovov + +BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + +END_PROVIDER + +! oovv + +BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + +END_PROVIDER + +! vvvo + +BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo) + +END_PROVIDER + +! vvov + +BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov) + +END_PROVIDER + +! vovv + +BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv) + +END_PROVIDER + +! ovvv + +BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv) + +END_PROVIDER + +! vvvv + +BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv) + +END_PROVIDER + +! ppqq + +BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] + + implicit none + + BEGIN_DOC + ! integrals for general MOs (excepted core and deleted ones) + END_DOC + + integer :: p,q + double precision, allocatable :: tmp_v(:,:,:,:) + + allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) + + call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) + + do q = 1, cc_n_mo + do p = 1, cc_n_mo + cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) + enddo + enddo + + deallocate(tmp_v) + +END_PROVIDER + +! aaii + +BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i) + enddo + enddo + + FREE cc_space_v_vvoo + +END_PROVIDER + +! iiaa + +BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a) + enddo + enddo + + FREE cc_space_v_oovv + +END_PROVIDER + +! iijj + +BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! i,j: occupied MO + END_DOC + + integer :: i,j + + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j) + enddo + enddo + + FREE cc_space_v_oooo + +END_PROVIDER + +! aabb + +BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a,b: virtual MO + END_DOC + + integer :: a,b + + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b) + enddo + enddo + + FREE cc_space_v_vvvv + +END_PROVIDER + +! iaia + +BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a) + enddo + enddo + + FREE cc_space_v_ovov + +END_PROVIDER + +! iaai + +BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i) + enddo + enddo + + FREE cc_space_v_ovvo + +END_PROVIDER + +! aiia + +BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a) + enddo + enddo + + FREE cc_space_v_voov + +END_PROVIDER + +! oovv + +BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, cc_nVa + do a = 1, cc_nVa + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! vvoo + +BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, cc_nOa + do i = 1, cc_nOa + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! F_oo + +BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo) + +END_PROVIDER + +! F_ov + +BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov) + +END_PROVIDER + +! F_vo + +BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo) + +END_PROVIDER + +! F_vv + +BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv) + +END_PROVIDER + +! F_o + +BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)] + + implicit none + + integer :: i + + do i = 1, cc_nOa + cc_space_f_o(i) = cc_space_f_oo(i,i) + enddo + +END_PROVIDER + +! F_v + +BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] + + implicit none + + integer :: i + + do i = 1, cc_nVa + cc_space_f_v(i) = cc_space_f_vv(i,i) + enddo + +END_PROVIDER + +! Shift + +subroutine shift_idx_spin(s,n_S,shift) + + implicit none + + BEGIN_DOC + ! Shift for the partitionning alpha/beta of the spin orbitals + ! n_S(1): number of spin alpha in the correspondong list + ! n_S(2): number of spin beta in the correspondong list + END_DOC + + integer, intent(in) :: s, n_S(2) + integer, intent(out) :: shift + + if (s == 1) then + shift = 0 + else + shift = n_S(1) + endif + +end + +! F + +subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) + + implicit none + + BEGIN_DOC + ! Compute the Fock matrix corresponding to two lists of spin orbitals. + ! Ex: occ/occ, occ/vir,... + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2) + integer, intent(in) :: dim1, dim2 + + double precision, intent(out) :: f(dim1, dim2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i,j, idx_i,idx_j,i_shift,j_shift + integer :: tmp_i,tmp_j + integer :: si,sj,s + + allocate(tmp_F(mo_num,mo_num)) + + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + s = si + sj + + if (s == 2 .or. s == 4) then + call get_fock_matrix_spin(det,sj,tmp_F) + else + do j = 1, mo_num + do i = 1, mo_num + tmp_F(i,j) = 0d0 + enddo + enddo + endif + + do tmp_j = 1, n2_S(sj) + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + f(idx_i,idx_j) = tmp_F(i,j) + enddo + enddo + + enddo + enddo + + deallocate(tmp_F) + +end + +! Get F + +subroutine get_fock_matrix_spin(det,s,f) + + implicit none + + BEGIN_DOC + ! Fock matrix alpha or beta of an arbitrary det + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: s + + double precision, intent(out) :: f(mo_num,mo_num) + + integer :: p,q,i,s1,s2 + integer(bit_kind) :: res(N_int,2) + logical :: ok + double precision :: mo_two_e_integral + + if (s == 1) then + s1 = 1 + s2 = 2 + else + s1 = 2 + s2 = 1 + endif + + !$OMP PARALLEL & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP PRIVATE(p,q,ok,i,res)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do q = 1, mo_num + do p = 1, mo_num + f(p,q) = mo_one_e_integrals(p,q) + do i = 1, mo_num + call apply_hole(det, s1, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + endif + enddo + do i = 1, mo_num + call apply_hole(det, s2, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + endif + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! V + +subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3, dim4 + double precision, intent(out) :: v(dim1,dim2,dim3,dim4) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx + +subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_l(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_l <= n4_S(1)) then + sl = 1 + else + sl = 2 + endif + call shift_idx_spin(sl,n4_S,l_shift) + tmp_l = idx_l - l_shift + l = list4(tmp_l,sl) + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_ij_l + +subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_k(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_k <= n3_S(1)) then + sk = 1 + else + sk = 2 + endif + call shift_idx_spin(sk,n3_S,k_shift) + tmp_k = idx_k - k_shift + k = list3(tmp_k,sk) + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_i_kl + +subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_j(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_j <= n2_S(1)) then + sj = 1 + else + sj = 2 + endif + call shift_idx_spin(sj,n2_S,j_shift) + tmp_j = idx_j - j_shift + j = list2(tmp_j,sj) + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end diff --git a/src/utils_cc/occupancy.irp.f b/src/utils_cc/occupancy.irp.f new file mode 100644 index 00000000..c6139bb3 --- /dev/null +++ b/src/utils_cc/occupancy.irp.f @@ -0,0 +1,328 @@ +! N spin orb + +subroutine extract_n_spin(det,n) + + implicit none + + BEGIN_DOC + ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals + ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb) + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: n(4) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si + logical :: ok, is_core, is_del + + ! Init + n = 0 + + ! Loop over the spin + do si = 1, 2 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + n(si) = n(si) + 1 + else + ! hole + n(si+2) = n(si+2) + 1 + endif + enddo + enddo + + !print*,n(1),n(2),n(3),n(4) + +end + +! Spin + +subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals + ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb) + END_DOC + + integer, intent(in) :: nO_m, nV_m + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO_m,2), list_vir(nV_m,2) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha/beta + + ! occ alpha -> list_occ(:,1) + ! occ beta -> list_occ(:,2) + ! vir alpha -> list_vir(:,1) + ! vir beta -> list_vir(:,2) + + ! Loop over the spin + do si = 1, 2 + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o,si) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v,si) = i + idx_v = idx_v + 1 + endif + enddo + enddo + +end + +! Space + +subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied and virtual alpha spin orbitals + END_DOC + + integer, intent(in) :: nO, nV + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO), list_vir(nV) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + if (elec_alpha_num /= elec_beta_num) then + print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort' + call abort + endif + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha + + ! occ alpha -> list_occ(:,1) + ! vir alpha -> list_vir(:,1) + + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, 1, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v) = i + idx_v = idx_v + 1 + endif + enddo + +end + +! is_core + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +! is_del + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +! N orb + +BEGIN_PROVIDER [integer, cc_nO_m] +&BEGIN_PROVIDER [integer, cc_nOa] +&BEGIN_PROVIDER [integer, cc_nOb] +&BEGIN_PROVIDER [integer, cc_nOab] +&BEGIN_PROVIDER [integer, cc_nV_m] +&BEGIN_PROVIDER [integer, cc_nVa] +&BEGIN_PROVIDER [integer, cc_nVb] +&BEGIN_PROVIDER [integer, cc_nVab] +&BEGIN_PROVIDER [integer, cc_n_mo] +&BEGIN_PROVIDER [integer, cc_nO_S, (2)] +&BEGIN_PROVIDER [integer, cc_nV_S, (2)] + + implicit none + + BEGIN_DOC + ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det + ! a: alpha, b: beta + ! nO_m: max(a,b) occupied + ! nOa: nb a occupied + ! nOb: nb b occupied + ! nOab: nb a+b occupied + ! nV_m: max(a,b) virtual + ! nVa: nb a virtual + ! nVb: nb b virtual + ! nVab: nb a+b virtual + END_DOC + + integer :: n_spin(4) + + ! Extract number of occ/vir alpha/beta spin orbitals + call extract_n_spin(psi_det(1,1,cc_ref),n_spin) + + cc_nOa = n_spin(1) + cc_nOb = n_spin(2) + cc_nOab = cc_nOa + cc_nOb !n_spin(1) + n_spin(2) + cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2)) + cc_nVa = n_spin(3) + cc_nVb = n_spin(4) + cc_nVab = cc_nVa + cc_nVb !n_spin(3) + n_spin(4) + cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4)) + cc_n_mo = cc_nVa + cc_nVb !n_spin(1) + n_spin(3) + cc_nO_S = (/cc_nOa,cc_nOb/) + cc_nV_S = (/cc_nVa,cc_nVb/) + +END_PROVIDER + +! General + +BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)] + + implicit none + + BEGIN_DOC + ! List of general orbitals without core and deleted ones + END_DOC + + integer :: i,j + logical :: is_core, is_del + + j = 1 + do i = 1, mo_num + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + cc_list_gen(j) = i + j = j+1 + enddo + +END_PROVIDER + +! Space + +BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)] +&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spatial orbitals without core and deleted ones + END_DOC + + call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir) + +END_PROVIDER + +! Spin + +BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] +&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] +&BEGIN_PROVIDER [logical, cc_ref_is_open_shell] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spin orbitals without core and deleted ones + END_DOC + + integer :: i + + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + + cc_ref_is_open_shell = .False. + do i = 1, cc_nO_m + if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then + cc_ref_is_open_shell = .True. + endif + enddo + + +END_PROVIDER diff --git a/src/utils_cc/org/TANGLE_org_mode.sh b/src/utils_cc/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_cc/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_cc/org/diis.org b/src/utils_cc/org/diis.org new file mode 100644 index 00000000..c48b917e --- /dev/null +++ b/src/utils_cc/org/diis.org @@ -0,0 +1,574 @@ +* DIIS +https://hal.archives-ouvertes.fr/hal-02492983/document +Maxime Chupin, Mi-Song Dupuy, Guillaume Legendre, Eric Séré. Convergence analysis of adaptive +DIIS algorithms witerh application to electronic ground state calculations. +ESAIM: Mathematical Modelling and Numerical Analysis, EDP Sciences, 2021, 55 (6), pp.2785 - 2825. 10.1051/m2an/2021069ff.ffhal-02492983v5 + +t_{k+1} = g(t_k) +err_k = f(t_k) = t_{k+1} - t_k + +m_k = min(m,k) +m maximal depth +t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i}) +\sum_{i=0}^{m_k} c_i^k = 1 + +b_{ij}^k = < err^{k-m_k+j}, err^{k-m_k+i} > + +(b -1) ( c^k ) = ( 0 ) +(-1 0) ( \lambda) ( -1 ) + +lambda is used to put the constraint \sum_{i=0}^{m_k} c_i^k = 1 + +In: t_0, err_0, m +err_0 = g(t_0) +k = 0 +m_k = 0 +while ||err_k|| > CC + A.x=b + t_{k+1} = \sum_{i=0}^{m_k} c_i^k g(t_{k-m_k+i}) + err_{k+1} = f(t_{k+1}) + m_{k+1} = min(m_k+1,m) + k = k +1 +end + +* Code +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine diis_cc(all_err,all_t,sze,m,iter,t) + + implicit none + + BEGIN_DOC + ! DIIS. Take the error vectors and the amplitudes of the previous + ! iterations to compute the new amplitudes + END_DOC + + ! {err_i}_{i=1}^{m_it} -> B -> c + ! {t_i}_{i=1}^{m_it}, c, {err_i}_{i=1}^{m_it} -> t_{m_it+1} + + integer, intent(in) :: m,iter,sze + double precision, intent(in) :: all_err(sze,m) + double precision, intent(in) :: all_t(sze,m) + + double precision, intent(out) :: t(sze) + + double precision, allocatable :: B(:,:), c(:), zero(:) + integer :: m_iter + integer :: i,j,k + integer :: info + integer, allocatable :: ipiv(:) + double precision :: accu + + m_iter = min(m,iter) + !print*,'m_iter',m_iter + allocate(B(m_iter+1,m_iter+1), c(m_iter), zero(m_iter+1)) + allocate(ipiv(m+1)) + + ! B(i,j) = < err(iter-m_iter+j),err(iter-m_iter+i) > ! iter-m_iter will be zero for us + B = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(B,m,m_iter,sze,all_err) & + !$OMP PRIVATE(i,j,k,accu) & + !$OMP DEFAULT(NONE) + do j = 1, m_iter + do i = 1, m_iter + accu = 0d0 + !$OMP DO + do k = 1, sze + ! the errors of the ith iteration are in all_err(:,m+1-i) + accu = accu + all_err(k,m+1-i) * all_err(k,m+1-j) + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + B(i,j) = B(i,j) + accu + !$OMP END CRITICAL + enddo + enddo + !$OMP END PARALLEL + + do i = 1, m_iter + B(i,m_iter+1) = -1 + enddo + do j = 1, m_iter + B(m_iter+1,j) = -1 + enddo + ! Debug + !print*,'B' + !do i = 1, m_iter+1 + ! write(*,'(100(F10.6))') B(i,:) + !enddo + + ! (0 0 .... 0 -1) + zero = 0d0 + zero(m_iter+1) = -1d0 + + ! Solve B.c = zero + call dgesv(m_iter+1, 1, B, size(B,1), ipiv, zero, size(zero,1), info) + if (info /= 0) then + print*,'DIIS error in dgesv:', info + call abort + endif + ! c corresponds to the m_iter first solutions + c = zero(1:m_iter) + ! Debug + !print*,'c',c + !print*,'all_t' + !do i = 1, m + ! write(*,'(100(F10.6))') all_t(:,i) + !enddo + !print*,'all_err' + !do i = 1, m + ! write(*,'(100(F10.6))') all_err(:,i) + !enddo + + ! update T + !$OMP PARALLEL & + !$OMP SHARED(t,c,m,all_err,all_t,sze,m_iter) & + !$OMP PRIVATE(i,j,accu) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, sze + t(i) = 0d0 + enddo + !$OMP END DO + do i = 1, m_iter + !$OMP DO + do j = 1, sze + t(j) = t(j) + c(i) * (all_t(j,m+1-i) + all_err(j,m+1-i)) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + !print*,'new t',t + + deallocate(ipiv,B,c,zero) + +end +#+end_src + +** Update all err +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_all_err(err,all_err,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the err vectors of the previous iterations to add the new one + ! The last err vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: err(sze) + double precision, intent(inout) :: all_err(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_err,err,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_err(j,i) = all_err(j,i+1) + enddo + !$OMP END DO + enddo + + ! Debug + !print*,'shift err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + + ! New + !$OMP DO + do i = 1, sze + all_err(i,m) = err(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated err' + !do i = 1, m + ! print*,i, all_err(:,i) + !enddo + +end +#+end_src + +** Update all t +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_all_t(t,all_t,sze,m,iter) + + implicit none + + BEGIN_DOC + ! Shift all the t vectors of the previous iterations to add the new one + ! The last t vector is placed in the last position and all the others are + ! moved toward the first one. + END_DOC + + integer, intent(in) :: m, iter, sze + double precision, intent(in) :: t(sze) + double precision, intent(inout) :: all_t(sze,m) + integer :: i,j + integer :: m_iter + + m_iter = min(m,iter) + + ! Shift + !$OMP PARALLEL & + !$OMP SHARED(m,all_t,t,sze) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do i = 1, m-1 + !$OMP DO + do j = 1, sze + all_t(j,i) = all_t(j,i+1) + enddo + !$OMP END DO + enddo + + ! New + !$OMP DO + do i = 1, sze + all_t(i,m) = t(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Debug + !print*,'Updated t' + !do i = 1, m + ! print*,i, all_t(:,i) + !enddo + +end +#+end_src + +** Err +*** Err1 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine compute_err1(nO,nV,f_o,f_v,r1,err1) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t1 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO,nV) + + double precision, intent(out) :: err1(nO,nV) + + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(err1,r1,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO + do a = 1, nV + do i = 1, nO + err1(i,a) = - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** Err2 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine compute_err2(nO,nV,f_o,f_v,r2,err2) + + implicit none + + BEGIN_DOC + ! Compute the error vector for the t2 + END_DOC + + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO,nO,nV,nV) + + double precision, intent(out) :: err2(nO,nO,nV,nV) + + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(err2,r2,f_o,f_v,nO,nV,cc_level_shift) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + err2(i,j,a,b) = - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* Gather call diis +** Update t +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + if (cc_update_method == 'diis') then + + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! DIIS T1, it is not always good since the t1 can be small + ! That's why there is a call to update the t1 in the standard way + ! T1 error tensor + !call compute_err1(nO,nV,f_o,f_v,r1,err1) + ! Transfo errors and parameters in vectors + !tmp_err1 = reshape(err1,(/nO*nV/)) + !tmp_t1 = reshape(t1 ,(/nO*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + !call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + !call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + !call diis_cc(all_err1,all_t1,nO*nV,cc_diis_depth,nb_iter+1,tmp_t1) + !t1 = reshape(tmp_t1 ,(/nO,nV/)) + call update_t1(nO,nV,f_o,f_v,r1,t1) + + ! DIIS T2 + ! T2 error tensor + call compute_err2(nO,nV,f_o,f_v,r2,err2) + ! Transfo errors and parameters in vectors + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + ! Add the error and parameter vectors with those of the previous iterations + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + ! Diis and reshape T as a tensor + call diis_cc(all_err2,all_t2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t2) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2) + + ! Standard update as T = T - Delta + elseif (cc_update_method == 'none') then + + call update_t1(nO,nV,f_o,f_v,r1,t1) + call update_t2(nO,nV,f_o,f_v,r2,t2) + + else + print*,'Unkonw cc_method_method: '//cc_update_method + endif + +end + #+end_src + +** Update t v2 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(inout) :: all_err1(nO*nV, cc_diis_depth), all_err2(nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t1(nO*nV, cc_diis_depth), all_t2(nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: all_t(:,:), all_err(:,:), tmp_t(:) + double precision, allocatable :: err1(:,:), err2(:,:,:,:) + double precision, allocatable :: tmp_err1(:), tmp_err2(:) + double precision, allocatable :: tmp_t1(:), tmp_t2(:) + + integer :: i,j + + ! Allocate + allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(tmp_t(nO*nV+nO*nO*nV*nV)) + allocate(err1(nO,nV), err2(nO,nO,nV,nV)) + allocate(tmp_err1(nO*nV), tmp_err2(nO*nO*nV*nV)) + allocate(tmp_t1(nO*nV), tmp_t2(nO*nO*nV*nV)) + + ! Compute the errors and reshape them as vector + call compute_err1(nO,nV,f_o,f_v,r1,err1) + call compute_err2(nO,nV,f_o,f_v,r2,err2) + tmp_err1 = reshape(err1,(/nO*nV/)) + tmp_err2 = reshape(err2,(/nO*nO*nV*nV/)) + tmp_t1 = reshape(t1 ,(/nO*nV/)) + tmp_t2 = reshape(t2 ,(/nO*nO*nV*nV/)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp_err1,all_err1,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t1 ,all_t1 ,nO*nV,cc_diis_depth,nb_iter+1) + call update_all_err(tmp_err2,all_err2,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + call update_all_t (tmp_t2 ,all_t2 ,nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Gather the different parameters and errors + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,all_err,all_err1,all_err2,cc_diis_depth,& + !$OMP all_t,all_t1,all_t2) & + !$OMP PRIVATE(i,j) & + !$OMP DEFAULT(NONE) + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_err(i,j) = all_err1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_err(i+nO*nV,j) = all_err2(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nV + all_t(i,j) = all_t1(i,j) + enddo + !$OMP END DO NOWAIT + enddo + do j = 1, cc_diis_depth + !$OMP DO + do i = 1, nO*nO*nV*nV + all_t(i+nO*nV,j) = all_t2(i,j) + enddo + !$OMP END DO + enddo + !$OMP END PARALLEL + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp_t) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp_t,tmp_t1,tmp_t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp_t1(i) = tmp_t(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp_t2(i) = tmp_t(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Reshape as tensors + t1 = reshape(tmp_t1 ,(/nO,nV/)) + t2 = reshape(tmp_t2 ,(/nO,nO,nV,nV/)) + + ! Deallocate + deallocate(tmp_t1,tmp_t2,tmp_err1,tmp_err2,err1,err2,all_t,all_err) + +end + #+end_src + + +** Update t v3 +#+begin_src f90 :comments org :tangle diis.irp.f +subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + + implicit none + + integer, intent(in) :: nO,nV,nb_iter + double precision, intent(in) :: f_o(nO), f_v(nV) + double precision, intent(in) :: r1(nO,nV), r2(nO,nO,nV,nV) + + double precision, intent(inout) :: t1(nO*nV), t2(nO*nO*nV*nV) + double precision, intent(inout) :: all_err(nO*nV+nO*nO*nV*nV, cc_diis_depth) + double precision, intent(inout) :: all_t(nO*nV+nO*nO*nV*nV, cc_diis_depth) + + double precision, allocatable :: tmp(:) + + integer :: i,j + + ! Allocate + allocate(tmp(nO*nV+nO*nO*nV*nV)) + + ! Compute the errors + call compute_err1(nO,nV,f_o,f_v,r1,tmp(1:nO*nV)) + call compute_err2(nO,nV,f_o,f_v,r2,tmp(nO*nV+1:nO*nV+nO*nO*nV*nV)) + + ! Update the errors and parameters for the diis + call update_all_err(tmp,all_err,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + tmp(i) = t1(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + tmp(i+nO*nV) = t2(i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call update_all_t(tmp,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1) + + ! Diis + call diis_cc(all_err,all_t,nO*nV+nO*nO*nV*nV,cc_diis_depth,nb_iter+1,tmp) + + ! Split the resulting vector + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tmp,t1,t2) & + !$OMP PRIVATE(i) & + !$OMP DEFAULT(NONE) + !$OMP DO + do i = 1, nO*nV + t1(i) = tmp(i) + enddo + !$OMP END DO NOWAIT + !$OMP DO + do i = 1, nO*nO*nV*nV + t2(i) = tmp(i+nO*nV) + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! Deallocate + deallocate(tmp) + +end + #+end_src + diff --git a/src/utils_cc/org/energy.org b/src/utils_cc/org/energy.org new file mode 100644 index 00000000..2ec5c8ef --- /dev/null +++ b/src/utils_cc/org/energy.org @@ -0,0 +1,15 @@ +#+begin_src f90 :comments org :tangle energy.irp.f +subroutine det_energy(det,energy) + + implicit none + + integer(bit_kind), intent(in) :: det + + double precision, intent(out) :: energy + + call i_H_j(det,det,N_int,energy) + + energy = energy + nuclear_repulsion + +end +#+end_src diff --git a/src/utils_cc/org/guess_t.org b/src/utils_cc/org/guess_t.org new file mode 100644 index 00000000..9e162242 --- /dev/null +++ b/src/utils_cc/org/guess_t.org @@ -0,0 +1,222 @@ +* Guess +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine guess_t1(nO,nV,f_o,f_v,f_ov,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), f_ov(nO,nV) + + ! inout + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (trim(cc_guess_t1) == 'none') then + t1 = 0d0 + else if (trim(cc_guess_t1) == 'MP') then + do a = 1, nV + do i = 1, nO + t1(i,a) = f_ov(i,a) / (f_o(i) - f_v(a) - cc_level_shift_guess) + enddo + enddo + else if (trim(cc_guess_t1) == 'read') then + call read_t1(nO,nV,t1) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t1) + call abort + endif + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine guess_t2(nO,nV,f_o,f_v,v_oovv,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), v_oovv(nO, nO, nV, nV) + + ! inout + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (trim(cc_guess_t2) == 'none') then + t2 = 0d0 + else if (trim(cc_guess_t2) == 'MP') then + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = v_oovv(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift_guess) + enddo + enddo + enddo + enddo + else if (trim(cc_guess_t2) == 'read') then + call read_t2(nO,nV,t2) + else + print*, 'Unknown cc_guess_t1 type: '//trim(cc_guess_t2) + call abort + endif + +end +#+end_src + +* Write +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine write_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Write the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO, nV) + + ! internal + integer :: i,a + + if (cc_write_t1) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + write(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + endif + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine write_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Write the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + if (cc_write_t2) then + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + write(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + endif + +end +#+end_src + +* Read +** T1 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine read_t1(nO,nV,t1) + + implicit none + + BEGIN_DOC + ! Read the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t1(nO, nV) + + ! internal + integer :: i,a + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t1 = True' + print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + do a = 1, nV + do i = 1, nO + read(11,'(F20.12)') t1(i,a) + enddo + enddo + close(11) + +end +#+end_src + +** T2 +#+begin_src f90 :comments org :tangle guess_t.irp.f +subroutine read_t2(nO,nV,t2) + + implicit none + + BEGIN_DOC + ! Read the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(out) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + logical :: ok + + inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) + if (.not. ok) then + print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' + print*, 'Do a first calculation with cc_write_t2 = True' + print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' + call abort + endif + open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(11,'(F20.12)') t2(i,j,a,b) + enddo + enddo + enddo + enddo + close(11) + +end +#+end_src diff --git a/src/utils_cc/org/mo_integrals_cc.org b/src/utils_cc/org/mo_integrals_cc.org new file mode 100644 index 00000000..ff3d229c --- /dev/null +++ b/src/utils_cc/org/mo_integrals_cc.org @@ -0,0 +1,1305 @@ +* mo two e integrals +** Space +*** F +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_f_space(det,n1,n2,list1,list2,f) + + implicit none + + integer, intent(in) :: n1,n2 + integer, intent(in) :: list1(n1),list2(n2) + integer(bit_kind), intent(in) :: det(N_int,2) + double precision, intent(out) :: f(n1,n2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i1,i2,idx1,idx2 + + allocate(tmp_F(mo_num,mo_num)) + + call get_fock_matrix_spin(det,1,tmp_F) + + !$OMP PARALLEL & + !$OMP SHARED(tmp_F,f,n1,n2,list1,list2) & + !$OMP PRIVATE(idx1,idx2,i1,i2)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do i2 = 1, n2 + do i1 = 1, n1 + idx2 = list2(i2) + idx1 = list1(i1) + f(i1,i2) = tmp_F(idx1,idx2) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_F) + +end +#+end_src + +*** V +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) + + implicit none + + integer, intent(in) :: n1,n2,n3,n4 + integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) + double precision, intent(out) :: v(n1,n2,n3,n4) + + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + idx4 = list4(i4) + idx3 = list3(i3) + idx2 = list2(i2) + idx1 = list1(i1) + v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +** Provider space +*** V +**** full +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] + + implicit none + + integer :: i,j,k,l + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & + !$OMP PRIVATE(i,j,k,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER +#+end_src +**** oooo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + +END_PROVIDER +#+end_src + +**** vooo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + +END_PROVIDER +#+end_src + +**** ovoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + +END_PROVIDER +#+end_src + +**** oovo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + +END_PROVIDER +#+end_src + +**** ooov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + +END_PROVIDER +#+end_src + +**** vvoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + +END_PROVIDER +#+end_src + +**** vovo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + +END_PROVIDER +#+end_src + +**** voov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + +END_PROVIDER +#+end_src + +**** ovvo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + +END_PROVIDER +#+end_src + +**** ovov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + +END_PROVIDER +#+end_src + +**** oovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + +END_PROVIDER +#+end_src + +**** vvvo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_vvvo) + +END_PROVIDER +#+end_src + +**** vvov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvov, (cc_nVa, cc_nVa, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_vvov) + +END_PROVIDER +#+end_src + +**** vovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vovv, (cc_nVa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_vovv) + +END_PROVIDER +#+end_src + +**** ovvv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_ovvv) + +END_PROVIDER +#+end_src + +**** vvvv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,cc_nVa, cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir, cc_space_v_vvvv) + +END_PROVIDER +#+end_src + +**** ppqq +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] + + implicit none + + BEGIN_DOC + ! integrals for general MOs (excepted core and deleted ones) + END_DOC + + integer :: p,q + double precision, allocatable :: tmp_v(:,:,:,:) + + allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) + + call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) + + do q = 1, cc_n_mo + do p = 1, cc_n_mo + cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) + enddo + enddo + + deallocate(tmp_v) + +END_PROVIDER +#+END_SRC + +**** aaii +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i) + enddo + enddo + + FREE cc_space_v_vvoo + +END_PROVIDER +#+END_SRC + +**** iiaa +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a) + enddo + enddo + + FREE cc_space_v_oovv + +END_PROVIDER +#+END_SRC + +**** iijj +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! i,j: occupied MO + END_DOC + + integer :: i,j + + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j) + enddo + enddo + + FREE cc_space_v_oooo + +END_PROVIDER +#+END_SRC + +**** aabb +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a,b: virtual MO + END_DOC + + integer :: a,b + + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b) + enddo + enddo + + FREE cc_space_v_vvvv + +END_PROVIDER +#+END_SRC + +**** iaia +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a) + enddo + enddo + + FREE cc_space_v_ovov + +END_PROVIDER +#+END_SRC + +**** iaai +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i) + enddo + enddo + + FREE cc_space_v_ovvo + +END_PROVIDER +#+END_SRC + +**** aiia +#+BEGIN_SRC f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a) + enddo + enddo + + FREE cc_space_v_voov + +END_PROVIDER +#+END_SRC + +*** W +**** oovv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, cc_nVa + do a = 1, cc_nVa + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER +#+end_src + +**** vvoo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, cc_nOa + do i = 1, cc_nOa + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER +#+end_src + +*** F +**** F_oo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo) + +END_PROVIDER +#+end_src + +**** F_ov +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov) + +END_PROVIDER +#+end_src + +**** F_vo +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo) + +END_PROVIDER +#+end_src + +**** F_vv +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv) + +END_PROVIDER +#+end_src + +**** F_o +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)] + + implicit none + + integer :: i + + do i = 1, cc_nOa + cc_space_f_o(i) = cc_space_f_oo(i,i) + enddo + +END_PROVIDER +#+end_src + +**** F_v +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] + + implicit none + + integer :: i + + do i = 1, cc_nVa + cc_space_f_v(i) = cc_space_f_vv(i,i) + enddo + +END_PROVIDER +#+end_src + +** Spin +*** Shift +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine shift_idx_spin(s,n_S,shift) + + implicit none + + BEGIN_DOC + ! Shift for the partitionning alpha/beta of the spin orbitals + ! n_S(1): number of spin alpha in the correspondong list + ! n_S(2): number of spin beta in the correspondong list + END_DOC + + integer, intent(in) :: s, n_S(2) + integer, intent(out) :: shift + + if (s == 1) then + shift = 0 + else + shift = n_S(1) + endif + +end +#+end_src + +*** F +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) + + implicit none + + BEGIN_DOC + ! Compute the Fock matrix corresponding to two lists of spin orbitals. + ! Ex: occ/occ, occ/vir,... + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2) + integer, intent(in) :: dim1, dim2 + + double precision, intent(out) :: f(dim1, dim2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i,j, idx_i,idx_j,i_shift,j_shift + integer :: tmp_i,tmp_j + integer :: si,sj,s + + allocate(tmp_F(mo_num,mo_num)) + + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + s = si + sj + + if (s == 2 .or. s == 4) then + call get_fock_matrix_spin(det,sj,tmp_F) + else + do j = 1, mo_num + do i = 1, mo_num + tmp_F(i,j) = 0d0 + enddo + enddo + endif + + do tmp_j = 1, n2_S(sj) + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + f(idx_i,idx_j) = tmp_F(i,j) + enddo + enddo + + enddo + enddo + + deallocate(tmp_F) + +end +#+end_src + +*** Get F +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine get_fock_matrix_spin(det,s,f) + + implicit none + + BEGIN_DOC + ! Fock matrix alpha or beta of an arbitrary det + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: s + + double precision, intent(out) :: f(mo_num,mo_num) + + integer :: p,q,i,s1,s2 + integer(bit_kind) :: res(N_int,2) + logical :: ok + double precision :: mo_two_e_integral + + if (s == 1) then + s1 = 1 + s2 = 2 + else + s1 = 2 + s2 = 1 + endif + + !$OMP PARALLEL & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP PRIVATE(p,q,ok,i,res)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do q = 1, mo_num + do p = 1, mo_num + f(p,q) = mo_one_e_integrals(p,q) + do i = 1, mo_num + call apply_hole(det, s1, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + endif + enddo + do i = 1, mo_num + call apply_hole(det, s2, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + endif + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +*** V +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3, dim4 + double precision, intent(out) :: v(dim1,dim2,dim3,dim4) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_l(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_l <= n4_S(1)) then + sl = 1 + else + sl = 2 + endif + call shift_idx_spin(sl,n4_S,l_shift) + tmp_l = idx_l - l_shift + l = list4(tmp_l,sl) + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx_ij_l +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_k(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_k <= n3_S(1)) then + sk = 1 + else + sk = 2 + endif + call shift_idx_spin(sk,n3_S,k_shift) + tmp_k = idx_k - k_shift + k = list3(tmp_k,sk) + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + +*** V_3idx_i_kl +#+begin_src f90 :comments org :tangle mo_integrals_cc.irp.f +subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_j(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_j <= n2_S(1)) then + sj = 1 + else + sj = 2 + endif + call shift_idx_spin(sj,n2_S,j_shift) + tmp_j = idx_j - j_shift + j = list2(tmp_j,sj) + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end +#+end_src + diff --git a/src/utils_cc/org/occupancy.org b/src/utils_cc/org/occupancy.org new file mode 100644 index 00000000..246bbd5b --- /dev/null +++ b/src/utils_cc/org/occupancy.org @@ -0,0 +1,341 @@ +* N spin orb +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_n_spin(det,n) + + implicit none + + BEGIN_DOC + ! Returns the number of occupied alpha, occupied beta, virtual alpha, virtual beta spin orbitals + ! in det without counting the core and deleted orbitals in the format n(nOa,nOb,nVa,nVb) + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: n(4) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si + logical :: ok, is_core, is_del + + ! Init + n = 0 + + ! Loop over the spin + do si = 1, 2 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + n(si) = n(si) + 1 + else + ! hole + n(si+2) = n(si+2) + 1 + endif + enddo + enddo + + !print*,n(1),n(2),n(3),n(4) + +end +#+end_src + +* List_orb +** Spin +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied alpha/beta, virtual alpha/beta spin orbitals + ! size(nO_m,1) must be max(nOa,nOb) and size(nV_m,1) must be max(nVa,nVb) + END_DOC + + integer, intent(in) :: nO_m, nV_m + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO_m,2), list_vir(nV_m,2) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha/beta + + ! occ alpha -> list_occ(:,1) + ! occ beta -> list_occ(:,2) + ! vir alpha -> list_vir(:,1) + ! vir beta -> list_vir(:,2) + + ! Loop over the spin + do si = 1, 2 + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, si, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o,si) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v,si) = i + idx_v = idx_v + 1 + endif + enddo + enddo + +end +#+end_src + +** Space +#+begin_src f90 :comments org :tangle occupancy.irp.f +subroutine extract_list_orb_space(det,nO,nV,list_occ,list_vir) + + implicit none + + BEGIN_DOC + ! Returns the the list of occupied and virtual alpha spin orbitals + END_DOC + + integer, intent(in) :: nO, nV + integer(bit_kind), intent(in) :: det(N_int,2) + + integer, intent(out) :: list_occ(nO), list_vir(nV) + + integer(bit_kind) :: res(N_int,2) + integer :: i, si, idx_o, idx_v, idx_i, idx_b + logical :: ok, is_core, is_del + + if (elec_alpha_num /= elec_beta_num) then + print*,'Error elec_alpha_num /= elec_beta_num, impossible to create cc_list_occ and cc_list_vir, abort' + call abort + endif + + list_occ = 0 + list_vir = 0 + + ! List of occ/vir alpha + + ! occ alpha -> list_occ(:,1) + ! vir alpha -> list_vir(:,1) + + ! tmp idx + idx_o = 1 + idx_v = 1 + do i = 1, mo_num + call apply_hole(det, 1, i, res, ok, N_int) + + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + + if (ok) then + ! particle + list_occ(idx_o) = i + idx_o = idx_o + 1 + else + ! hole + list_vir(idx_v) = i + idx_v = idx_v + 1 + endif + enddo + +end +#+end_src + +** is_core +#+begin_src f90 :comments org :tangle occupancy.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end +#+end_src + +** is_del +#+begin_src f90 :comments org :tangle occupancy.irp.f +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end +#+end_src + +* Providers +** N orb +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_nO_m] +&BEGIN_PROVIDER [integer, cc_nOa] +&BEGIN_PROVIDER [integer, cc_nOb] +&BEGIN_PROVIDER [integer, cc_nOab] +&BEGIN_PROVIDER [integer, cc_nV_m] +&BEGIN_PROVIDER [integer, cc_nVa] +&BEGIN_PROVIDER [integer, cc_nVb] +&BEGIN_PROVIDER [integer, cc_nVab] +&BEGIN_PROVIDER [integer, cc_n_mo] +&BEGIN_PROVIDER [integer, cc_nO_S, (2)] +&BEGIN_PROVIDER [integer, cc_nV_S, (2)] + + implicit none + + BEGIN_DOC + ! Number of orbitals without core and deleted ones of the cc_ref det in psi_det + ! a: alpha, b: beta + ! nO_m: max(a,b) occupied + ! nOa: nb a occupied + ! nOb: nb b occupied + ! nOab: nb a+b occupied + ! nV_m: max(a,b) virtual + ! nVa: nb a virtual + ! nVb: nb b virtual + ! nVab: nb a+b virtual + END_DOC + + integer :: n_spin(4) + + ! Extract number of occ/vir alpha/beta spin orbitals + call extract_n_spin(psi_det(1,1,cc_ref),n_spin) + + cc_nOa = n_spin(1) + cc_nOb = n_spin(2) + cc_nOab = cc_nOa + cc_nOb !n_spin(1) + n_spin(2) + cc_nO_m = max(cc_nOa,cc_nOb) !max(n_spin(1), n_spin(2)) + cc_nVa = n_spin(3) + cc_nVb = n_spin(4) + cc_nVab = cc_nVa + cc_nVb !n_spin(3) + n_spin(4) + cc_nV_m = max(cc_nVa,cc_nVb) !max(n_spin(3), n_spin(4)) + cc_n_mo = cc_nVa + cc_nVb !n_spin(1) + n_spin(3) + cc_nO_S = (/cc_nOa,cc_nOb/) + cc_nV_S = (/cc_nVa,cc_nVb/) + +END_PROVIDER +#+end_src + +** List orb + +*** General +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_gen, (cc_n_mo)] + + implicit none + + BEGIN_DOC + ! List of general orbitals without core and deleted ones + END_DOC + + integer :: i,j + logical :: is_core, is_del + + j = 1 + do i = 1, mo_num + ! in core ? + if (is_core(i)) cycle + ! in del ? + if (is_del(i)) cycle + cc_list_gen(j) = i + j = j+1 + enddo + +END_PROVIDER +#+end_src + +*** Space +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_occ, (cc_nOa)] +&BEGIN_PROVIDER [integer, cc_list_vir, (cc_nVa)] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spatial orbitals without core and deleted ones + END_DOC + + call extract_list_orb_space(psi_det(1,1,cc_ref),cc_nOa,cc_nVa,cc_list_occ,cc_list_vir) + +END_PROVIDER +#+end_src + +*** Spin +#+BEGIN_SRC f90 :comments org :tangle occupancy.irp.f + BEGIN_PROVIDER [integer, cc_list_occ_spin, (cc_nO_m,2)] +&BEGIN_PROVIDER [integer, cc_list_vir_spin, (cc_nV_m,2)] +&BEGIN_PROVIDER [logical, cc_ref_is_open_shell] + + implicit none + + BEGIN_DOC + ! List of occupied and virtual spin orbitals without core and deleted ones + END_DOC + + integer :: i + + call extract_list_orb_spin(psi_det(1,1,cc_ref),cc_nO_m,cc_nV_m,cc_list_occ_spin,cc_list_vir_spin) + + cc_ref_is_open_shell = .False. + do i = 1, cc_nO_m + if (cc_list_occ_spin(i,1) /= cc_list_occ_spin(i,2)) then + cc_ref_is_open_shell = .True. + endif + enddo + + +END_PROVIDER +#+end_src diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org new file mode 100644 index 00000000..5f67859c --- /dev/null +++ b/src/utils_cc/org/phase.org @@ -0,0 +1,178 @@ +#+begin_src f90 :comments org :notangle phase.irp.f +program run + implicit none + + integer :: n(2), degree1, degree2, exc(0:2,2,2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + double precision :: phase1, phase2 + integer :: h1,h2,p1,p2,s1,s2,i,j + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + do i = 1, N_det-1 + do j = i+1, N_det + !call print_det(psi_det(1,1,j),N_int) + call get_excitation(psi_det(1,1,i),psi_det(1,1,j),exc,degree1,phase1,N_int) + call decode_exc(exc,degree1,h1,p1,h2,p2,s1,s2) + !print*,'old',degree1,phase1 + !print*,'h1:',h1,'h2:',h2,'s1:',s1,'s2:',s2 + !print*,'p1:',p1,'p2:',p2 + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree1,N_int) + call get_excitation_general(psi_det(1,1,i),psi_det(1,1,j),degree2,n,list_anni,list_crea,phase2,N_int) + !print*,'new',degree2,phase2 + !print*,'ha:',list_anni(1:n(1),1),'hb',list_anni(1:n(2),2) + !print*,'pa:',list_crea(1:n(1),1),'pb',list_crea(1:n(2),2) + !print*,'' + if (degree1 /= degree2) then + print*,'Error degree:',degree1,degree2 + call abort + endif + if (degree1 <= 2 .and. phase1 /= phase2) then + print*,'Error phase',phase1,phase2 + call abort + endif + enddo + enddo + +end +#+end_src + +** phase +#+begin_src f90 :comments org :tangle phase.irp.f +subroutine get_phase_general(det1,det2,phase,degree,Nint) + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: degree + integer :: n(2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) +end +#+end_src + +** Get excitation general +#+begin_src f90 :comments org :tangle phase.irp.f +subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) + + use bitmasks + + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: list_crea(Nint*bit_kind_size,2) + integer, intent(out) :: list_anni(Nint*bit_kind_size,2) + integer, intent(out) :: degree, n(2) + + integer, allocatable :: l1(:,:), l2(:,:) + integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:) + integer, allocatable :: pos_anni(:,:), pos_crea(:,:) + + integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d + + allocate(l1(Nint*bit_kind_size,2)) + allocate(l2(Nint*bit_kind_size,2)) + allocate(det_crea(Nint,2),det_anni(Nint,2)) + + ! 1 111010 + ! 2 110101 + ! + !not 1-> 000101 + ! 2 110101 + !and 000101 -> crea + ! + ! 1 111010 + !not 2-> 001010 + ! 001010 -> anni + + do j = 1, 2 + do i = 1, Nint + det_crea(i,j) = iand(not(det1(i,j)),det2(i,j)) + enddo + enddo + + do j = 1, 2 + do i = 1, Nint + det_anni(i,j) = iand(det1(i,j),not(det2(i,j))) + enddo + enddo + + call bitstring_to_list_ab(det1,l1,n1,Nint) + call bitstring_to_list_ab(det2,l2,n2,Nint) + call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint) + call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint) + + do i = 1, 2 + if (n_crea(i) /= n_anni(i)) then + print*,'Well, it seems we have a problem here...' + call abort + endif + enddo + + !1 11110011001 1 2 3 4 7 8 11 + !pos 1 2 3 4 5 6 7 + !2 11100101011 1 2 3 6 8 10 11 + !anni 00010010000 4 7 + !pos 4 5 + !crea 00000100010 6 10 + !pos 4 6 + !4 -> 6 pos(4 -> 4) + !7 -> 10 pos(5 -> 6) + + n = n_anni + degree = n_anni(1) + n_anni(2) + + allocate(pos_anni(max(n(1),n(2)),2)) + allocate(pos_crea(max(n(1),n(2)),2)) + + ! Search pos anni + do j = 1, 2 + k = 1 + do i = 1, n1(j) + if (l1(i,j) /= list_anni(k,j)) cycle + pos_anni(k,j) = i + k = k + 1 + enddo + enddo + + ! Search pos crea + do j = 1, 2 + k = 1 + do i = 1, n2(j) + if (l2(i,j) /= list_crea(k,j)) cycle + pos_crea(k,j) = i + k = k + 1 + enddo + enddo + + ! Distance between the ith anni and the ith crea op + ! By doing so there is no crossing between the different pairs of anni/crea + ! and the phase is determined by the sum of the distances + ! -> (-1)^{sum of the distances} + d = 0 + do j = 1, 2 + do i = 1, n(j) + d = d + abs(pos_anni(i,j) - pos_crea(i,j)) + enddo + enddo + + phase = dble((-1)**d) + + ! Debug + !print*,l2(1:n2(1),1) + !print*,l2(1:n2(2),2) + !!call print_det(det1,Nint) + !!call print_det(det2,Nint) + !print*,phase + !print*,'' +end +#+end_src + diff --git a/src/utils_cc/org/print_wf_qp_edit.org b/src/utils_cc/org/print_wf_qp_edit.org new file mode 100644 index 00000000..0f19ac76 --- /dev/null +++ b/src/utils_cc/org/print_wf_qp_edit.org @@ -0,0 +1,33 @@ +#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f +program run + + implicit none + + read_wf = .true. + touch read_wf + + call print_wf_qp_edit() + +end +#+end_src + +#+begin_src f90 :comments org :tangle print_wf_qp_edit.irp.f +subroutine print_wf_qp_edit() + + implicit none + + BEGIN_DOC + ! Print the psi_det wave function up to n_det_qp_edit + END_DOC + + integer :: i + + do i = 1, n_det_qp_edit + print*,i + write(*,'(100(1pE12.4))') psi_coef(i,:) + call print_det(psi_det(1,1,i),N_int) + print*,'' + enddo + +end +#+end_src diff --git a/src/utils_cc/org/update_t.org b/src/utils_cc/org/update_t.org new file mode 100644 index 00000000..c0207b22 --- /dev/null +++ b/src/utils_cc/org/update_t.org @@ -0,0 +1,76 @@ +* T1 +#+begin_src f90 :comments org :tangle update_t.irp.f +subroutine update_t1(nO,nV,f_o,f_v,r1,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO, nV) + + ! inout + double precision, intent(inout) :: t1(nO, nV) + + ! internal + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + +* T2 +#+begin_src f90 :comments org :tangle update_t.irp.f +subroutine update_t2(nO,nV,f_o,f_v,r2,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV) + + ! inout + double precision, intent(inout) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end +#+end_src + diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f new file mode 100644 index 00000000..01b41f49 --- /dev/null +++ b/src/utils_cc/phase.irp.f @@ -0,0 +1,135 @@ +! phase + +subroutine get_phase_general(det1,det2,phase,degree,Nint) + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: degree + integer :: n(2) + integer, allocatable :: list_anni(:,:), list_crea(:,:) + + allocate(list_anni(N_int*bit_kind_size,2)) + allocate(list_crea(N_int*bit_kind_size,2)) + + call get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) +end + +! Get excitation general + +subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,Nint) + + use bitmasks + + implicit none + + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2), det2(Nint,2) + double precision, intent(out) :: phase + integer, intent(out) :: list_crea(Nint*bit_kind_size,2) + integer, intent(out) :: list_anni(Nint*bit_kind_size,2) + integer, intent(out) :: degree, n(2) + + integer, allocatable :: l1(:,:), l2(:,:) + integer(bit_kind), allocatable :: det_crea(:,:), det_anni(:,:) + integer, allocatable :: pos_anni(:,:), pos_crea(:,:) + + integer :: n1(2),n2(2),n_crea(2),n_anni(2),i,j,k,d + + allocate(l1(Nint*bit_kind_size,2)) + allocate(l2(Nint*bit_kind_size,2)) + allocate(det_crea(Nint,2),det_anni(Nint,2)) + + ! 1 111010 + ! 2 110101 + ! + !not 1-> 000101 + ! 2 110101 + !and 000101 -> crea + ! + ! 1 111010 + !not 2-> 001010 + ! 001010 -> anni + + do j = 1, 2 + do i = 1, Nint + det_crea(i,j) = iand(not(det1(i,j)),det2(i,j)) + enddo + enddo + + do j = 1, 2 + do i = 1, Nint + det_anni(i,j) = iand(det1(i,j),not(det2(i,j))) + enddo + enddo + + call bitstring_to_list_ab(det1,l1,n1,Nint) + call bitstring_to_list_ab(det2,l2,n2,Nint) + call bitstring_to_list_ab(det_crea,list_crea,n_crea,Nint) + call bitstring_to_list_ab(det_anni,list_anni,n_anni,Nint) + + do i = 1, 2 + if (n_crea(i) /= n_anni(i)) then + print*,'Well, it seems we have a problem here...' + call abort + endif + enddo + + !1 11110011001 1 2 3 4 7 8 11 + !pos 1 2 3 4 5 6 7 + !2 11100101011 1 2 3 6 8 10 11 + !anni 00010010000 4 7 + !pos 4 5 + !crea 00000100010 6 10 + !pos 4 6 + !4 -> 6 pos(4 -> 4) + !7 -> 10 pos(5 -> 6) + + n = n_anni + degree = n_anni(1) + n_anni(2) + + allocate(pos_anni(max(n(1),n(2)),2)) + allocate(pos_crea(max(n(1),n(2)),2)) + + ! Search pos anni + do j = 1, 2 + k = 1 + do i = 1, n1(j) + if (l1(i,j) /= list_anni(k,j)) cycle + pos_anni(k,j) = i + k = k + 1 + enddo + enddo + + ! Search pos crea + do j = 1, 2 + k = 1 + do i = 1, n2(j) + if (l2(i,j) /= list_crea(k,j)) cycle + pos_crea(k,j) = i + k = k + 1 + enddo + enddo + + ! Distance between the ith anni and the ith crea op + ! By doing so there is no crossing between the different pairs of anni/crea + ! and the phase is determined by the sum of the distances + ! -> (-1)^{sum of the distances} + d = 0 + do j = 1, 2 + do i = 1, n(j) + d = d + abs(pos_anni(i,j) - pos_crea(i,j)) + enddo + enddo + + phase = dble((-1)**d) + + ! Debug + !print*,l2(1:n2(1),1) + !print*,l2(1:n2(2),2) + !!call print_det(det1,Nint) + !!call print_det(det2,Nint) + !print*,phase + !print*,'' +end diff --git a/src/utils_cc/print_wf_qp_edit.irp.f b/src/utils_cc/print_wf_qp_edit.irp.f new file mode 100644 index 00000000..1337621d --- /dev/null +++ b/src/utils_cc/print_wf_qp_edit.irp.f @@ -0,0 +1,29 @@ +program run + + implicit none + + read_wf = .true. + touch read_wf + + call print_wf_qp_edit() + +end + +subroutine print_wf_qp_edit() + + implicit none + + BEGIN_DOC + ! Print the psi_det wave function up to n_det_qp_edit + END_DOC + + integer :: i + + do i = 1, n_det_qp_edit + print*,i + write(*,'(100(1pE12.4))') psi_coef(i,:) + call print_det(psi_det(1,1,i),N_int) + print*,'' + enddo + +end diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f new file mode 100644 index 00000000..dbd4f4bd --- /dev/null +++ b/src/utils_cc/update_t.irp.f @@ -0,0 +1,73 @@ +! T1 + +subroutine update_t1(nO,nV,f_o,f_v,r1,t1) + + implicit none + + BEGIN_DOC + ! Update the T1 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r1(nO, nV) + + ! inout + double precision, intent(inout) :: t1(nO, nV) + + ! internal + integer :: i,a + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,a) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do a = 1, nV + do i = 1, nO + t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! T2 + +subroutine update_t2(nO,nV,f_o,f_v,r2,t2) + + implicit none + + BEGIN_DOC + ! Update the T2 amplitudes for CC + END_DOC + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: f_o(nO), f_v(nV), r2(nO, nO, nV, nV) + + ! inout + double precision, intent(inout) :: t2(nO, nO, nV, nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + t2(i,j,a,b) = t2(i,j,a,b) - r2(i,j,a,b) / (f_o(i) + f_o(j) - f_v(a) - f_v(b) - cc_level_shift) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end diff --git a/src/utils_trust_region/README.md b/src/utils_trust_region/README.md new file mode 100644 index 00000000..72bfefef --- /dev/null +++ b/src/utils_trust_region/README.md @@ -0,0 +1,11 @@ +# Utils trust region + +The documentation can be found in the org files. + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst deleted file mode 100644 index 6a0689b6..00000000 --- a/src/utils_trust_region/README.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -trust_region -============ - -The documentation can be found in the org files. diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f index eac17275..933d8eff 100644 --- a/src/utils_trust_region/algo_trust.irp.f +++ b/src/utils_trust_region/algo_trust.irp.f @@ -133,19 +133,19 @@ ! | must_exit | logical | If the program must exit the loop | -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - END_DOC + !END_DOC implicit none ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) double precision, intent(in) :: rho, prev_criterion ! inout @@ -160,9 +160,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, must_exit = .False. - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) ! exit if DABS(prev_criterion - criterion_model) < 1d-12 if (DABS(prev_criterion - criterion_model) < thresh_model) then @@ -210,9 +210,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute if the step should be cancelled - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f index e274ec11..a313769d 100644 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - END_DOC + !END_DOC implicit none @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/org/TANGLE_org_mode.sh b/src/utils_trust_region/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/org/algo_trust.org similarity index 96% rename from src/utils_trust_region/algo_trust.org rename to src/utils_trust_region/org/algo_trust.org index aa836f98..01e99c29 100644 --- a/src/utils_trust_region/algo_trust.org +++ b/src/utils_trust_region/org/algo_trust.org @@ -132,19 +132,19 @@ Output: | must_exit | logical | If the program must exit the loop | #+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - END_DOC + !END_DOC implicit none ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) double precision, intent(in) :: rho, prev_criterion ! inout @@ -159,9 +159,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, must_exit = .False. - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) ! exit if DABS(prev_criterion - criterion_model) < 1d-12 if (DABS(prev_criterion - criterion_model) < thresh_model) then @@ -208,9 +208,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute if the step should be cancelled - END_DOC + !END_DOC implicit none @@ -310,7 +310,7 @@ subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) print*,'-----------------------------' ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) if (must_exit) then @@ -489,7 +489,7 @@ subroutine algo_trust_cartesian_template(tmp_n) print*,'-----------------------------' ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) if (must_exit) then diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/org/apply_mo_rotation.org similarity index 92% rename from src/utils_trust_region/apply_mo_rotation.org rename to src/utils_trust_region/org/apply_mo_rotation.org index 918581b7..955997e9 100644 --- a/src/utils_trust_region/apply_mo_rotation.org +++ b/src/utils_trust_region/org/apply_mo_rotation.org @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - END_DOC + !END_DOC implicit none @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/org/mat_to_vec_index.org similarity index 100% rename from src/utils_trust_region/mat_to_vec_index.org rename to src/utils_trust_region/org/mat_to_vec_index.org diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/org/rotation_matrix.org similarity index 97% rename from src/utils_trust_region/rotation_matrix.org rename to src/utils_trust_region/org/rotation_matrix.org index 73ba0298..3b2ff437 100644 --- a/src/utils_trust_region/rotation_matrix.org +++ b/src/utils_trust_region/org/rotation_matrix.org @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - BEGIN_DOC + !BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC + !END_DOC include 'pi.h' @@ -188,7 +188,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) endif enddo enddo - print*,'max element in A', max_elem_A + !print*,'max element in A', max_elem_A if (ABS(max_elem_A) > 2 * pi) then print*,'' @@ -220,18 +220,16 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) lwork = 3*n-1 allocate(work(lwork,n)) - print*,'Starting diagonalization ...' + !print*,'Starting diagonalization ...' call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) deallocate(work) - if (info2 == 0) then - print*, 'Diagonalization : Done' - elseif (info2 < 0) then + if (info2 < 0) then print*, 'WARNING: error in the diagonalization' print*, 'Illegal value of the ', info2,'-th parameter' - else + elseif (info2 >0) then print*, "WARNING: Diagonalization failed to converge" endif #+END_SRC @@ -308,7 +306,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) max_elem = tau_m1(i,i) endif enddo - print*,'max elem tau^-1:', max_elem + !print*,'max elem tau^-1:', max_elem ! Debug !print*,'eigenvalues:' @@ -380,7 +378,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) norm = dnrm2(n*n,RR_t,1) - print*, 'Rotation matrix check, norm R.R^T = ', norm + !print*, 'Rotation matrix check, norm R.R^T = ', norm ! Debug !if (debug) then @@ -404,9 +402,9 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) enddo print*, 'Max error in R.R^T:', max_elem - print*, 'e_val(1):', e_val(1) - print*, 'e_val(n):', e_val(n) - print*, 'max elem in A:', max_elem_A + !print*, 'e_val(1):', e_val(1) + !print*, 'e_val(n):', e_val(n) + !print*, 'max elem in A:', max_elem_A if (ABS(max_elem) > 1d-12) then print*, 'WARNING: max error in R.R^T > 1d-12' diff --git a/src/utils_trust_region/org/rotation_matrix_iterative.org b/src/utils_trust_region/org/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org similarity index 98% rename from src/utils_trust_region/sub_to_full_rotation_matrix.org rename to src/utils_trust_region/org/sub_to_full_rotation_matrix.org index 16434dc8..f0cf0bfc 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org @@ -32,9 +32,9 @@ Internal: #+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - BEGIN_DOC + !BEGIN_DOC ! Compute the full rotation matrix from a smaller one - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/org/trust_region_expected_e.org similarity index 66% rename from src/utils_trust_region/trust_region_expected_e.org rename to src/utils_trust_region/org/trust_region_expected_e.org index 58c8f804..9d2868fa 100644 --- a/src/utils_trust_region/trust_region_expected_e.org +++ b/src/utils_trust_region/org/trust_region_expected_e.org @@ -10,11 +10,12 @@ E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \t \end{align*} Input: -| n | integer | m*(m-1)/2 | -| v_grad(n) | double precision | gradient | -| H(n,n) | double precision | hessian | -| x(n) | double precision | Step in the trust region | -| prev_energy | double precision | previous energy | +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | Output: | e_model | double precision | predicted energy after the rotation of the MOs | @@ -29,21 +30,21 @@ Function: | ddot | double precision | dot product (Lapack) | #+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) double precision, intent(in) :: prev_energy ! out @@ -80,27 +81,34 @@ TODO: remove the dot products part_1 = ddot(n,v_grad,1,x,1) !if (debug) then - print*,'g.x : ', part_1 - !endif - + ! print*,'g.x : ', part_1 + !endif + ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif ! Product 1/2 . x^T.H.x part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 + ! print*,'1/2*x^T.H.x : ', part_2 !endif - print*,'prev_energy', prev_energy ! Sum e_model = prev_energy + part_1 + part_2 ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model ! Can be deleted, already in another subroutine if (DABS(prev_energy - e_model) < 1d-12 ) then @@ -115,7 +123,6 @@ TODO: remove the dot products print*,'Time in trust e model:', t3 print*,'---End trust_e_model---' - print*,'' end subroutine #+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/org/trust_region_optimal_lambda.org similarity index 94% rename from src/utils_trust_region/trust_region_optimal_lambda.org rename to src/utils_trust_region/org/trust_region_optimal_lambda.org index b39c9a10..ff454cb6 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ b/src/utils_trust_region/org/trust_region_optimal_lambda.org @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - END_DOC + !END_DOC implicit none @@ -195,18 +195,17 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' print*,'---Trust_newton---' - print*,'' call wall_time(t1) ! version_lambda_search ! 1 -> ||x||^2 - delta^2 = 0, ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif ! Version 2 is normally better #+END_SRC @@ -215,21 +214,21 @@ Resolution with the Newton method: #+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f ! Initialization epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) + lambda = max(0d0, -e_val(1)) ! Pre research of lambda to start near the optimal lambda ! by adding a constant epsilon and changing the constant to ! have ||x(lambda + epsilon)|| ~ delta, before setting ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta i = 1 ! To increase lambda if (f_N > delta**2) then - print*,'Increasing lambda...' + !print*,'Increasing lambda...' do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -239,7 +238,7 @@ Resolution with the Newton method: ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N < f_N) then @@ -253,7 +252,7 @@ Resolution with the Newton method: ! To reduce lambda else - print*,'Reducing lambda...' + !print*,'Reducing lambda...' do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -263,7 +262,7 @@ Resolution with the Newton method: ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N > f_N) then @@ -276,27 +275,25 @@ Resolution with the Newton method: enddo endif - print*,'End of the pre research of lambda' + !print*,'End of the pre research of lambda' ! New value of lambda lambda = lambda + epsilon - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda ! thresh_cc, threshold for the research of the optimal lambda ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc ! thresh_rho_2, threshold to cancel the step in the research ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' ! Initialization i = 1 @@ -323,9 +320,9 @@ Resolution with the Newton method: ! Newton's method do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' ! Update of f_N, f_R and the derivatives prev_f_N = f_N @@ -338,7 +335,7 @@ Resolution with the Newton method: d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -347,7 +344,7 @@ Resolution with the Newton method: if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,E12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -355,8 +352,8 @@ Resolution with the Newton method: ! Updates lambda prev_lambda = lambda lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda ! Checks if lambda is in (-h_1, \infty) if (lambda > MAX(0d0, -e_val(1))) then @@ -370,18 +367,18 @@ Resolution with the Newton method: f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 endif - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model ! Check the gain if (DABS(prev_f_R - model) < thresh_model_2) then @@ -400,10 +397,10 @@ Resolution with the Newton method: ! Computes rho_2 rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 + !print*,'rho_2:', rho_2 else rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' + !print*,'lambda < -e_val(1) ===> rho_2 = 0' endif ! Evolution of the trust length, alpha @@ -416,20 +413,20 @@ Resolution with the Newton method: else alpha = 0.25d0 * alpha endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then lambda = prev_lambda f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' endif - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' i = i + 1 enddo @@ -444,20 +441,19 @@ Resolution with the Newton method: print*,'' endif - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 ! Time call wall_time(t2) t3 = t2 - t1 print*,'Time in trust_newton:', t3 - print*,'' print*,'---End trust_newton---' - print*,'' end subroutine #+END_SRC @@ -508,9 +504,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -645,9 +641,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -794,9 +790,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -906,9 +902,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1004,9 +1000,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1107,9 +1103,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -1190,9 +1186,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1347,9 +1343,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1506,9 +1502,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1601,9 +1597,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/org/trust_region_rho.org similarity index 85% rename from src/utils_trust_region/trust_region_rho.org rename to src/utils_trust_region/org/trust_region_rho.org index 9b25ee29..b669da8c 100644 --- a/src/utils_trust_region/trust_region_rho.org +++ b/src/utils_trust_region/org/trust_region_rho.org @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC + !END_DOC implicit none @@ -69,7 +69,7 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) print*,'' print*,'---Rho_model---' - call wall_time(t1) + !call wall_time(t1) #+END_SRC ** Rho @@ -93,31 +93,30 @@ If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) rho = (prev_energy - energy) / (prev_energy - e_model) - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho ! Modification of prev_energy in function of rho if (rho < thresh_rho) then !0.1) then ! the step is cancelled print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy + !print*, 'prev_energy :', prev_energy else ! the step is accepted prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy endif - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 print*,'---End rho_model---' - print*,'' end subroutine #+END_SRC diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/org/trust_region_step.org similarity index 89% rename from src/utils_trust_region/trust_region_step.org rename to src/utils_trust_region/org/trust_region_step.org index 331453a3..0e5f090f 100644 --- a/src/utils_trust_region/trust_region_step.org +++ b/src/utils_trust_region/org/trust_region_step.org @@ -341,9 +341,10 @@ Provided: Cf. qp_edit in orbital optimization section, for some constants/thresholds Input: -| m | integer | number of MOs | +| m | integer | number of MOs | | n | integer | m*(m-1)/2 | -| H(n, n) | double precision | hessian | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| H(n,n2) | double precision | hessian | | v_grad(n) | double precision | gradient | | e_val(n) | double precision | eigenvalues of the hessian | | W(n, n) | double precision | eigenvectors of the hessian | @@ -371,23 +372,23 @@ Function: | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compuet the step in the trust region - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n + integer, intent(in) :: n,n2 double precision, intent(in) :: v_grad(n), rho integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) + double precision, intent(in) :: e_val(n), w(n,n2) ! inout double precision, intent(inout) :: delta @@ -434,11 +435,19 @@ avoid_saddle is actually a test to avoid saddle points ! List of w^T.g, to avoid the recomputation tmp_wtg = 0d0 - do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo enddo - enddo + else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo + endif ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue ! in the case of avoid_saddle @@ -465,18 +474,18 @@ avoid_saddle is actually a test to avoid saddle points tmp_wtg(1) = 0d0 endif - endif + endif ! Norm^2 of x, ||x||^2 norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm ! Anyway if the step is too big it will be reduced - print*,'||x||^2 :', norm2_x + !print*,'||x||^2 :', norm2_x ! Norm^2 of the gradient, ||v_grad||^2 norm2_g = (dnrm2(n,v_grad,1))**2 - print*,'||grad||^2 :', norm2_g + !print*,'||grad||^2 :', norm2_g #+END_SRC *** Trust radius initialization @@ -530,7 +539,7 @@ To avoid too big trust region we put a maximum size. delta = 1d10 endif - print*, 'Delta :', delta + !print*, 'Delta :', delta #+END_SRC *** Calculation of the optimal lambda @@ -550,26 +559,26 @@ You will find more details at the beginning ! Research of lambda to solve ||x(lambda)|| = Delta ! Display - print*, 'e_val(1) = ', e_val(1) - print*, 'w_1^T.g =', tmp_wtg(1) + !print*, 'e_val(1) = ', e_val(1) + !print*, 'w_1^T.g =', tmp_wtg(1) ! H positive definite if (e_val(1) > - thresh_eig) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta ! H positive definite, ||x(lambda = 0)|| <= Delta if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' lambda = 0d0 ! H positive definite, ||x(lambda = 0)|| > Delta else ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -577,14 +586,14 @@ You will find more details at the beginning else if (DABS(tmp_wtg(1)) < thresh_wtg) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) endif ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' lambda = - e_val(1) ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta @@ -592,12 +601,12 @@ You will find more details at the beginning ! H indefinite, w_1^T.g =/= 0 else ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -627,28 +636,53 @@ We compute x in function of lambda using its formula : ! Calculation of the step x - ! Normal version - if (.not. absolute_eig) then + if (n == n2) then + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo - ! Version to use the absolute value of the eigenvalues + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif endif double precision :: beta, norm_x @@ -719,7 +753,6 @@ antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". print*,'======================' print*,'---End trust_region---' print*,'======================' - print*,'' end #+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/org/vec_to_mat_index.org similarity index 98% rename from src/utils_trust_region/vec_to_mat_index.org rename to src/utils_trust_region/org/vec_to_mat_index.org index 0a09fa86..13b1b5ee 100644 --- a/src/utils_trust_region/vec_to_mat_index.org +++ b/src/utils_trust_region/org/vec_to_mat_index.org @@ -36,10 +36,10 @@ subroutine vec_to_mat_index(i,p,q) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing ! its index i a vector - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/org/vec_to_mat_v2.org similarity index 96% rename from src/utils_trust_region/vec_to_mat_v2.org rename to src/utils_trust_region/org/vec_to_mat_v2.org index 4e358a88..4ce5f5e1 100644 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ b/src/utils_trust_region/org/vec_to_mat_v2.org @@ -8,9 +8,9 @@ Can be done in OMP (for the first part and with omp critical for the second) #+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f subroutine vec_to_mat_v2(n,m,v_x,m_x) - BEGIN_DOC + !BEGIN_DOC ! Vector to antisymmetric matrix - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h index bbfabfec..2c36a9f0 100644 --- a/src/utils_trust_region/pi.h +++ b/src/utils_trust_region/pi.h @@ -1,2 +1,2 @@ - !logical, parameter :: debug=.False. + logical, parameter :: debug=.False. double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f index 4738fd67..827af8c0 100644 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - BEGIN_DOC + !BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC + !END_DOC include 'pi.h' @@ -187,7 +187,7 @@ do j = 1, n endif enddo enddo -print*,'max element in A', max_elem_A +!print*,'max element in A', max_elem_A if (ABS(max_elem_A) > 2 * pi) then print*,'' @@ -215,18 +215,16 @@ W=B lwork = 3*n-1 allocate(work(lwork,n)) -print*,'Starting diagonalization ...' +!print*,'Starting diagonalization ...' call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) deallocate(work) -if (info2 == 0) then - print*, 'Diagonalization : Done' -elseif (info2 < 0) then +if (info2 < 0) then print*, 'WARNING: error in the diagonalization' print*, 'Illegal value of the ', info2,'-th parameter' -else +elseif (info2 >0) then print*, "WARNING: Diagonalization failed to converge" endif @@ -302,7 +300,7 @@ do i = 1, n max_elem = tau_m1(i,i) endif enddo -print*,'max elem tau^-1:', max_elem +!print*,'max elem tau^-1:', max_elem ! Debug !print*,'eigenvalues:' @@ -373,7 +371,7 @@ enddo call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) norm = dnrm2(n*n,RR_t,1) -print*, 'Rotation matrix check, norm R.R^T = ', norm +!print*, 'Rotation matrix check, norm R.R^T = ', norm ! Debug !if (debug) then @@ -396,9 +394,9 @@ do j = 1, n enddo print*, 'Max error in R.R^T:', max_elem -print*, 'e_val(1):', e_val(1) -print*, 'e_val(n):', e_val(n) -print*, 'max elem in A:', max_elem_A +!print*, 'e_val(1):', e_val(1) +!print*, 'e_val(n):', e_val(n) +!print*, 'max elem in A:', max_elem_A if (ABS(max_elem) > 1d-12) then print*, 'WARNING: max error in R.R^T > 1d-12' diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f new file mode 100644 index 00000000..f268df04 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -0,0 +1,134 @@ +! Rotation matrix with the iterative method + +! \begin{align*} +! \textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +! \end{align*} + +! !!! Doesn't work !!! + + +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end + +! Factorial + +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f index bdd1f6ba..75d04352 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -32,9 +32,9 @@ subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - BEGIN_DOC + !BEGIN_DOC ! Compute the full rotation matrix from a smaller one - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f index b7d849d1..ad5ad2f9 100644 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -10,11 +10,12 @@ ! \end{align*} ! Input: -! | n | integer | m*(m-1)/2 | -! | v_grad(n) | double precision | gradient | -! | H(n,n) | double precision | hessian | -! | x(n) | double precision | Step in the trust region | -! | prev_energy | double precision | previous energy | +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | ! Output: ! | e_model | double precision | predicted energy after the rotation of the MOs | @@ -29,21 +30,21 @@ ! | ddot | double precision | dot product (Lapack) | -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) double precision, intent(in) :: prev_energy ! out @@ -79,27 +80,34 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) part_1 = ddot(n,v_grad,1,x,1) !if (debug) then - print*,'g.x : ', part_1 - !endif - + ! print*,'g.x : ', part_1 + !endif + ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif ! Product 1/2 . x^T.H.x part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 + ! print*,'1/2*x^T.H.x : ', part_2 !endif - print*,'prev_energy', prev_energy ! Sum e_model = prev_energy + part_1 + part_2 ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model ! Can be deleted, already in another subroutine if (DABS(prev_energy - e_model) < 1d-12 ) then @@ -114,6 +122,5 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) print*,'Time in trust e model:', t3 print*,'---End trust_e_model---' - print*,'' end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index f71bb405..b7dcf875 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - END_DOC + !END_DOC implicit none @@ -195,18 +195,17 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' print*,'---Trust_newton---' - print*,'' call wall_time(t1) ! version_lambda_search ! 1 -> ||x||^2 - delta^2 = 0, ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif ! Version 2 is normally better @@ -216,21 +215,21 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Initialization epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) + lambda = max(0d0, -e_val(1)) ! Pre research of lambda to start near the optimal lambda ! by adding a constant epsilon and changing the constant to ! have ||x(lambda + epsilon)|| ~ delta, before setting ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta i = 1 ! To increase lambda if (f_N > delta**2) then - print*,'Increasing lambda...' + !print*,'Increasing lambda...' do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -240,7 +239,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N < f_N) then @@ -254,7 +253,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! To reduce lambda else - print*,'Reducing lambda...' + !print*,'Reducing lambda...' do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -264,7 +263,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N > f_N) then @@ -277,27 +276,25 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) enddo endif - print*,'End of the pre research of lambda' + !print*,'End of the pre research of lambda' ! New value of lambda lambda = lambda + epsilon - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda ! thresh_cc, threshold for the research of the optimal lambda ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc ! thresh_rho_2, threshold to cancel the step in the research ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' ! Initialization i = 1 @@ -324,9 +321,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Newton's method do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' ! Update of f_N, f_R and the derivatives prev_f_N = f_N @@ -339,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -348,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,E12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -356,8 +353,8 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Updates lambda prev_lambda = lambda lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda ! Checks if lambda is in (-h_1, \infty) if (lambda > MAX(0d0, -e_val(1))) then @@ -371,18 +368,18 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 endif - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model ! Check the gain if (DABS(prev_f_R - model) < thresh_model_2) then @@ -401,10 +398,10 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Computes rho_2 rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 + !print*,'rho_2:', rho_2 else rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' + !print*,'lambda < -e_val(1) ===> rho_2 = 0' endif ! Evolution of the trust length, alpha @@ -417,20 +414,20 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then lambda = prev_lambda f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' endif - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' i = i + 1 enddo @@ -445,20 +442,19 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' endif - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 ! Time call wall_time(t2) t3 = t2 - t1 print*,'Time in trust_newton:', t3 - print*,'' print*,'---End trust_newton---' - print*,'' end subroutine @@ -508,9 +504,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -644,9 +640,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -792,9 +788,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -903,9 +899,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1000,9 +996,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1102,9 +1098,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -1184,9 +1180,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1340,9 +1336,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1498,9 +1494,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1592,9 +1588,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f index 45738736..11ab11e9 100644 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC + !END_DOC implicit none @@ -69,7 +69,7 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) print*,'' print*,'---Rho_model---' - call wall_time(t1) + !call wall_time(t1) ! Rho ! \begin{equation} @@ -92,30 +92,29 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) rho = (prev_energy - energy) / (prev_energy - e_model) - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho ! Modification of prev_energy in function of rho if (rho < thresh_rho) then !0.1) then ! the step is cancelled print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy + !print*, 'prev_energy :', prev_energy else ! the step is accepted prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy endif - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 print*,'---End rho_model---' - print*,'' end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f index 42aa6ed4..54161a1c 100644 --- a/src/utils_trust_region/trust_region_step.irp.f +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -341,9 +341,10 @@ ! Cf. qp_edit in orbital optimization section, for some constants/thresholds ! Input: -! | m | integer | number of MOs | +! | m | integer | number of MOs | ! | n | integer | m*(m-1)/2 | -! | H(n, n) | double precision | hessian | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | H(n,n2) | double precision | hessian | ! | v_grad(n) | double precision | gradient | ! | e_val(n) | double precision | eigenvalues of the hessian | ! | W(n, n) | double precision | eigenvectors of the hessian | @@ -371,23 +372,23 @@ ! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compuet the step in the trust region - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n + integer, intent(in) :: n,n2 double precision, intent(in) :: v_grad(n), rho integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) + double precision, intent(in) :: e_val(n), w(n,n2) ! inout double precision, intent(inout) :: delta @@ -432,11 +433,19 @@ lambda = 0d0 ! List of w^T.g, to avoid the recomputation tmp_wtg = 0d0 -do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) +if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo enddo -enddo +else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo +endif ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue ! in the case of avoid_saddle @@ -463,18 +472,18 @@ if (avoid_saddle .and. e_val(1) < - thresh_eig) then tmp_wtg(1) = 0d0 endif -endif +endif ! Norm^2 of x, ||x||^2 norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm ! Anyway if the step is too big it will be reduced -print*,'||x||^2 :', norm2_x +!print*,'||x||^2 :', norm2_x ! Norm^2 of the gradient, ||v_grad||^2 norm2_g = (dnrm2(n,v_grad,1))**2 -print*,'||grad||^2 :', norm2_g +!print*,'||grad||^2 :', norm2_g ! Trust radius initialization @@ -526,7 +535,7 @@ if (delta > 1d10) then delta = 1d10 endif -print*, 'Delta :', delta +!print*, 'Delta :', delta ! Calculation of the optimal lambda @@ -545,26 +554,26 @@ print*, 'Delta :', delta ! Research of lambda to solve ||x(lambda)|| = Delta ! Display -print*, 'e_val(1) = ', e_val(1) -print*, 'w_1^T.g =', tmp_wtg(1) +!print*, 'e_val(1) = ', e_val(1) +!print*, 'w_1^T.g =', tmp_wtg(1) ! H positive definite if (e_val(1) > - thresh_eig) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta ! H positive definite, ||x(lambda = 0)|| <= Delta if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' lambda = 0d0 ! H positive definite, ||x(lambda = 0)|| > Delta else ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -572,14 +581,14 @@ if (e_val(1) > - thresh_eig) then else if (DABS(tmp_wtg(1)) < thresh_wtg) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) endif ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' lambda = - e_val(1) ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta @@ -587,12 +596,12 @@ else ! H indefinite, w_1^T.g =/= 0 else ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -621,28 +630,53 @@ x = 0d0 ! Calculation of the step x -! Normal version -if (.not. absolute_eig) then +if (n == n2) then + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo -! Version to use the absolute value of the eigenvalues + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif endif double precision :: beta, norm_x @@ -711,6 +745,5 @@ deallocate(tmp_wtg) print*,'======================' print*,'---End trust_region---' print*,'======================' - print*,'' end diff --git a/src/utils_trust_region/vec_to_mat_index.irp.f b/src/utils_trust_region/vec_to_mat_index.irp.f index 5d68748b..a6d381f7 100644 --- a/src/utils_trust_region/vec_to_mat_index.irp.f +++ b/src/utils_trust_region/vec_to_mat_index.irp.f @@ -36,10 +36,10 @@ subroutine vec_to_mat_index(i,p,q) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing ! its index i a vector - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f index 9140b8d3..e184d3ba 100644 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -8,9 +8,9 @@ subroutine vec_to_mat_v2(n,m,v_x,m_x) - BEGIN_DOC + !BEGIN_DOC ! Vector to antisymmetric matrix - END_DOC + !END_DOC implicit none