diff --git a/configure b/configure index 66bc9419..48e6fd12 100755 --- a/configure +++ b/configure @@ -215,7 +215,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ EOF @@ -229,7 +228,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files EOF diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 841089ea..506cf069 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -44,8 +44,12 @@ end = struct let get_default = Qpackage.get_ezfio_default "ao_basis";; let read_ao_basis () = - Ezfio.get_ao_basis_ao_basis () - |> AO_basis_name.of_string + let result = + Ezfio.get_ao_basis_ao_basis () + in + if result <> "None" then + AO_basis_name.of_string result + else failwith "No basis" ;; let read_ao_num () = @@ -192,7 +196,7 @@ end = struct ao_expo ; ao_cartesian ; ao_normalized ; - primitives_normalized ; + primitives_normalized ; } = b in write_md5 b ; @@ -207,7 +211,7 @@ end = struct Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - let ao_nucl = + let ao_nucl = Array.to_list ao_nucl |> list_map Nucl_number.to_int in @@ -215,7 +219,7 @@ end = struct ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; let ao_power = - let l = Array.to_list ao_power in + let l = Array.to_list ao_power in List.concat [ (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ; (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ; @@ -227,7 +231,7 @@ end = struct Ezfio.set_ao_basis_ao_cartesian(ao_cartesian); Ezfio.set_ao_basis_ao_normalized(ao_normalized); Ezfio.set_ao_basis_primitives_normalized(primitives_normalized); - + let ao_coef = Array.to_list ao_coef |> list_map AO_coef.to_float @@ -267,7 +271,10 @@ end = struct |> Ezfio.set_ao_basis_ao_md5 ; Some result with - | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) + | _ -> ( "None" + |> Digest.string + |> Digest.to_hex + |> Ezfio.set_ao_basis_ao_md5 ; None) ;; @@ -276,7 +283,7 @@ end = struct to_basis b |> Long_basis.of_basis |> Array.of_list - and unordered_basis = + and unordered_basis = to_long_basis b |> Array.of_list in @@ -289,15 +296,15 @@ end = struct (a.(i) <- None ; i) else find x a (i+1) - and find2 (s,g,n) a i = + and find2 (s,g,n) a i = if i = Array.length a then -1 else - match a.(i) with + match a.(i) with | None -> find2 (s,g,n) a (i+1) | Some (s', g', n') -> if s <> s' || n <> n' then find2 (s,g,n) a (i+1) else - let lc = list_map (fun (prim, _) -> prim) g.Gto.lc + let lc = list_map (fun (prim, _) -> prim) g.Gto.lc and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc in if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i) @@ -313,13 +320,13 @@ end = struct let ao_num = List.length long_basis |> AO_number.of_int in let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc - |> AO_prim_number.of_int ) long_basis + |> AO_prim_number.of_int ) long_basis |> Array.of_list and ao_nucl = - list_map (fun (_,_,n) -> n) long_basis + list_map (fun (_,_,n) -> n) long_basis |> Array.of_list and ao_power = - list_map (fun (x,_,_) -> x) long_basis + list_map (fun (x,_,_) -> x) long_basis |> Array.of_list in let ao_prim_num_max = Array.fold_left (fun s x -> @@ -329,16 +336,16 @@ end = struct in let gtos = - list_map (fun (_,x,_) -> x) long_basis + list_map (fun (_,x,_) -> x) long_basis in let create_expo_coef ec = let coefs = begin match ec with | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos + list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos | `Expos -> list_map (fun x-> list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc ) gtos + prim.GaussianPrimitive.expo) x.Gto.lc ) gtos end in let rec get_n n accu = function @@ -360,7 +367,7 @@ end = struct let ao_coef = create_expo_coef `Coefs |> Array.of_list |> Array.map AO_coef.of_float - and ao_expo = create_expo_coef `Expos + and ao_expo = create_expo_coef `Expos |> Array.of_list |> Array.map AO_expo.of_float in @@ -372,7 +379,7 @@ end = struct } ;; - let reorder b = + let reorder b = let order = ordering b in let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max @@ -464,7 +471,7 @@ Basis set (read-only) :: | line :: tail -> let line = String.trim line in if line = "Basis set (read-only) ::" then - String.concat "\n" tail + String.concat "\n" tail else extract_basis tail in diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml index a4e6176a..832b464e 100644 --- a/ocaml/Input_mo_basis.ml +++ b/ocaml/Input_mo_basis.ml @@ -56,7 +56,10 @@ end = struct let read_ao_md5 () = let ao_md5 = match (Input_ao_basis.Ao_basis.read ()) with - | None -> failwith "Unable to read AO basis" + | None -> ("None" + |> Digest.string + |> Digest.to_hex + |> MD5.of_string) | Some result -> Input_ao_basis.Ao_basis.to_md5 result in let result = diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index b9d14efe..0cb862ae 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -38,7 +38,8 @@ let run slave ?prefix exe ezfio_file = | Unix.Unix_error _ -> try_new_port (port_number+100) in let result = - try_new_port 41279 + let port = 10*(Unix.getpid () mod 2823) + 32_769 in + try_new_port port in Zmq.Socket.close dummy_socket; Zmq.Context.terminate zmq_context; diff --git a/scripts/compilation/cache_compile.py b/scripts/compilation/cache_compile.py index 440f6498..473976e7 100755 --- a/scripts/compilation/cache_compile.py +++ b/scripts/compilation/cache_compile.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Save the .o from a .f90 -and is the .o is asked a second time, retur it +and is the .o is asked a second time, return it Take in argv command like: ifort -g -openmp -I IRPF90_temp/Ezfio_files/ -c IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.F90 -o IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.o """ diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py similarity index 67% rename from src/trexio/qp_import_trexio.py rename to scripts/qp_import_trexio.py index de8d1269..2c829f5c 100755 --- a/src/trexio/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -13,11 +13,17 @@ Options: import sys import os -import trexio import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +import qp_bitmasks + +try: + import trexio +except ImportError: + print("Error: trexio python module is not found. Try python3 -m pip install trexio") + sys.exit(1) try: @@ -90,14 +96,15 @@ def write_ezfio(trexio_filename, filename): p = re.compile(r'(\d*)$') label = [p.sub("", x).capitalize() for x in label] ezfio.set_nuclei_nucl_label(label) + print("OK") else: ezfio.set_nuclei_nucl_num(1) ezfio.set_nuclei_nucl_charge([0.]) ezfio.set_nuclei_nucl_coord([0.,0.,0.]) ezfio.set_nuclei_nucl_label(["X"]) + print("None") - print("OK") print("Electrons\t...\t", end=' ') @@ -105,12 +112,12 @@ def write_ezfio(trexio_filename, filename): try: num_beta = trexio.read_electron_dn_num(trexio_file) except: - num_beta = sum(charge)//2 + num_beta = int(sum(charge))//2 try: num_alpha = trexio.read_electron_up_num(trexio_file) except: - num_alpha = sum(charge) - num_beta + num_alpha = int(sum(charge)) - num_beta if num_alpha == 0: print("\n\nError: There are zero electrons in the TREXIO file.\n\n") @@ -118,7 +125,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_electrons_elec_alpha_num(num_alpha) ezfio.set_electrons_elec_beta_num(num_beta) - print("OK") + print(f"{num_alpha} {num_beta}") print("Basis\t\t...\t", end=' ') @@ -126,60 +133,113 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() not in ["gaussian", "slater"]: - raise TypeError + if basis_type.lower() in ["gaussian", "slater"]: + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) - shell_num = trexio.read_basis_shell_num(trexio_file) - prim_num = trexio.read_basis_prim_num(trexio_file) - ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) - nucl_index = trexio.read_basis_nucleus_index(trexio_file) - exponent = trexio.read_basis_exponent(trexio_file) - coefficient = trexio.read_basis_coefficient(trexio_file) - shell_index = trexio.read_basis_shell_index(trexio_file) - ao_shell = trexio.read_ao_shell(trexio_file) + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) - ezfio.set_basis_basis("Read from TREXIO") - ezfio.set_basis_shell_num(shell_num) - ezfio.set_basis_prim_num(prim_num) - ezfio.set_basis_shell_ang_mom(ang_mom) - ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) - ezfio.set_basis_prim_expo(exponent) - ezfio.set_basis_prim_coef(coefficient) + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) - nucl_shell_num = [] - prev = None - m = 0 - for i in ao_shell: - if i != prev: - m += 1 - if prev is None or nucl_index[i] != nucl_index[prev]: - nucl_shell_num.append(m) - m = 0 - prev = i - assert (len(nucl_shell_num) == nucl_num) + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) - shell_prim_num = [] - prev = shell_index[0] - count = 0 - for i in shell_index: - if i != prev: - shell_prim_num.append(count) - count = 0 - count += 1 - prev = i - shell_prim_num.append(count) + assert (len(shell_prim_num) == shell_num) - assert (len(shell_prim_num) == shell_num) - - ezfio.set_basis_shell_prim_num(shell_prim_num) - ezfio.set_basis_shell_index([x+1 for x in shell_index]) - ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) - shell_factor = trexio.read_basis_shell_factor(trexio_file) - prim_factor = trexio.read_basis_prim_factor(trexio_file) + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) - print("OK") + elif basis_type.lower() == "numerical": + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = shell_num + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = [1.]*prim_num + coefficient = [1.]*prim_num + shell_index = [i for i in range(shell_num)] + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("None") + ezfio.set_ao_basis_ao_basis("None") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = [1.]*prim_num + else: + raise TypeError + + print(basis_type) except: print("None") ezfio.set_ao_basis_ao_cartesian(True) @@ -256,9 +316,11 @@ def write_ezfio(trexio_filename, filename): # ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) - ezfio.set_ao_basis_ao_basis("Read from TREXIO") - print("OK") + print("OK") + + else: + print("None") # _ @@ -279,6 +341,7 @@ def write_ezfio(trexio_filename, filename): except: label = "None" ezfio.set_mo_basis_mo_label(label) + ezfio.set_determinants_mo_label(label) try: clss = trexio.read_mo_class(trexio_file) @@ -303,10 +366,10 @@ def write_ezfio(trexio_filename, filename): for i in range(num_beta): mo_occ[i] += 1. ezfio.set_mo_basis_mo_occ(mo_occ) + print("OK") except: - pass + print("None") - print("OK") print("Pseudos\t\t...\t", end=' ') @@ -386,8 +449,23 @@ def write_ezfio(trexio_filename, filename): ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + print("OK") + else: + print("None") + print("Determinant\t\t...\t", end=' ') + alpha = [ i for i in range(num_alpha) ] + beta = [ i for i in range(num_beta) ] + if trexio.has_mo_spin(trexio_file): + spin = trexio.read_mo_spin(trexio_file) + beta = [ i for i in range(mo_num) if spin[i] == 1 ] + beta = [ beta[i] for i in range(num_beta) ] + + alpha = qp_bitmasks.BitMask(alpha) + beta = qp_bitmasks.BitMask(beta ) + print(alpha) + print(beta) print("OK") diff --git a/scripts/utility/qp_bitmasks.py b/scripts/utility/qp_bitmasks.py index 38aa48d7..11965b72 100644 --- a/scripts/utility/qp_bitmasks.py +++ b/scripts/utility/qp_bitmasks.py @@ -22,7 +22,7 @@ def int_to_string(s): assert s>=0 AssertionError """ - assert type(s) in (int, long) + assert type(s) == int assert s>=0 return '{s:0b}'.format(s=s) @@ -62,7 +62,7 @@ def int_to_bitmask(s,bit_kind_size=BIT_KIND_SIZE): ['1111111111111111111111111111111111111111111111111111111111110110'] >>> """ - assert type(s) in (int, long) + assert type(s) == int if s < 0: s = s + (1 << bit_kind_size) return ['{s:0{width}b}'.format(s=s,width=bit_kind_size)] @@ -104,7 +104,7 @@ class BitMask(object): return self._data_int[i] def __setitem__(self,i,value): - if type(value) in (int,long): + if type(value) == int : self._data_int[i] = value elif type(value) == str: s = string_to_bitmask(value,bit_kind_size=self.bit_kind_size)[0] diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index 51d726da..6ad9b998 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -67,3 +67,15 @@ doc: Use normalized primitive functions interface: ezfio, provider default: true +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio +default: False + diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f new file mode 100644 index 00000000..dfa7d6b9 --- /dev/null +++ b/src/ao_basis/cosgtos.irp.f @@ -0,0 +1,34 @@ +BEGIN_PROVIDER [ logical, use_cosgtos ] + implicit none + BEGIN_DOC +! If true, use cosgtos for AO integrals + END_DOC + + logical :: has + PROVIDE ezfio_filename + use_cosgtos = .False. + if (mpi_master) then + call ezfio_has_ao_basis_use_cosgtos(has) + if (has) then +! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' + call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) + else + call ezfio_set_ao_basis_use_cosgtos(use_cosgtos) + endif + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( use_cosgtos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read use_cosgtos with MPI' + endif + IRP_ENDIF + +! call write_time(6) + +END_PROVIDER diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 02963605..33ca8085 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -62,6 +62,7 @@ END_PROVIDER double precision :: tmp_cent_x, tmp_cent_y, tmp_cent_z provide j1b_pen + provide j1b_pen_coef List_all_comb_b2_coef = 0.d0 List_all_comb_b2_expo = 0.d0 @@ -127,8 +128,8 @@ END_PROVIDER List_all_comb_b2_expo( 1) = 0.d0 List_all_comb_b2_cent(1:3,1) = 0.d0 do i = 1, nucl_num - List_all_comb_b2_coef( i+1) = -1.d0 - List_all_comb_b2_expo( i+1) = j1b_pen( i) + List_all_comb_b2_coef( i+1) = -1.d0 * j1b_pen_coef(i) + List_all_comb_b2_expo( i+1) = j1b_pen(i) List_all_comb_b2_cent(1,i+1) = nucl_coord(i,1) List_all_comb_b2_cent(2,i+1) = nucl_coord(i,2) List_all_comb_b2_cent(3,i+1) = nucl_coord(i,3) @@ -225,6 +226,7 @@ END_PROVIDER double precision :: dx, dy, dz, r2 provide j1b_pen + provide j1b_pen_coef List_all_comb_b3_coef = 0.d0 List_all_comb_b3_expo = 0.d0 @@ -296,8 +298,8 @@ END_PROVIDER do i = 1, nucl_num ii = ii + 1 - List_all_comb_b3_coef( ii) = -2.d0 - List_all_comb_b3_expo( ii) = j1b_pen( i) + List_all_comb_b3_coef( ii) = -2.d0 * j1b_pen_coef(i) + List_all_comb_b3_expo( ii) = j1b_pen(i) List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) List_all_comb_b3_cent(3,ii) = nucl_coord(i,3) @@ -305,7 +307,7 @@ END_PROVIDER do i = 1, nucl_num ii = ii + 1 - List_all_comb_b3_coef( ii) = 1.d0 + List_all_comb_b3_coef( ii) = 1.d0 * j1b_pen_coef(i) * j1b_pen_coef(i) List_all_comb_b3_expo( ii) = 2.d0 * j1b_pen(i) List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) @@ -337,7 +339,7 @@ END_PROVIDER ii = ii + 1 ! x 2 to avoid doing integrals twice - List_all_comb_b3_coef( ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2) + List_all_comb_b3_coef( ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2) * j1b_pen_coef(i) * j1b_pen_coef(j) List_all_comb_b3_expo( ii) = tmp3 List_all_comb_b3_cent(1,ii) = tmp4 * (tmp1 * xi + tmp2 * xj) List_all_comb_b3_cent(2,ii) = tmp4 * (tmp1 * yi + tmp2 * yj) diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index b9caaf5d..61d23b1e 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,3 +1,2 @@ ao_basis pseudo -cosgtos_ao_int diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/ao_one_e_ints/aos_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/aos_cosgtos.irp.f rename to src/ao_one_e_ints/aos_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_Coul_integrals.irp.f rename to src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_kin_integrals.irp.f rename to src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 928053ad..4f9ae76d 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -104,6 +104,9 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] IF(do_pseudo) THEN ao_integrals_n_e += ao_pseudo_integrals ENDIF + IF(point_charges) THEN + ao_integrals_n_e += ao_integrals_pt_chrg + ENDIF endif @@ -455,10 +458,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) else do ix=0,n_pt_in X(ix) = 0.d0 @@ -469,7 +474,8 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(a-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) nx = nd do ix=0,n_pt_in @@ -479,10 +485,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x1_pol_mult_one_e(a-1,c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) endif end @@ -519,7 +527,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) do ix=0,nx X(ix) *= dble(c-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny = 0 do ix=0,dim Y(ix) = 0.d0 @@ -527,7 +536,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) call I_x1_pol_mult_one_e(0,c-1,R1x,R1xp,R2x,Y,ny,dim) if(ny.ge.0)then - call multiply_poly(Y,ny,R1xp,2,d,nd) +! call multiply_poly(Y,ny,R1xp,2,d,nd) + call multiply_poly_c2(Y,ny,R1xp,d,nd) endif endif end diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index d4c995e6..9f523fca 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,19 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + +[ao_cholesky_threshold] +type: Threshold +doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero +interface: ezfio,provider,ocaml +default: 1.e-12 + [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..77eb6ddc 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - integer :: i,j,k,l - double precision :: xnorm0, x, integral - double precision, external :: ao_two_e_integral - - cholesky_ao_num_guess = 0 - xnorm0 = 0.d0 - x = 0.d0 - do j=1,ao_num - do i=1,ao_num - integral = ao_two_e_integral(i,i,j,j) - if (integral > ao_integrals_threshold) then - cholesky_ao_num_guess += 1 - else - x += integral - endif - enddo - enddo - print *, 'Cholesky decomposition of AO integrals' - print *, '--------------------------------------' - print *, '' - print *, 'Estimated Error: ', x - print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' - + cholesky_ao_num_guess = ao_num*ao_num END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -39,7 +17,7 @@ END_PROVIDER END_DOC type(c_ptr) :: ptr - integer :: fd, i,j,k,l, rank + integer :: fd, i,j,k,l,m,rank double precision, pointer :: ao_integrals(:,:,:,:) double precision, external :: ao_two_e_integral @@ -49,28 +27,83 @@ END_PROVIDER 8, fd, .False., ptr) call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - double precision :: integral + print*, 'Providing the AO integrals (Cholesky)' + call wall_time(wall_1) + call cpu_time(cpu_1) + + ao_integrals = 0.d0 + + double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) - do l=1,ao_num - do j=1,l - do k=1,ao_num - do i=1,k - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - enddo + double precision, external :: get_ao_two_e_integral + + if (read_ao_two_e_integrals) then + PROVIDE ao_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,ao_num + do k=1,ao_num + do i=1,ao_num + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) + ao_integrals(i,k,j,l) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + + endif ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/ao_two_e_ints/gauss_legendre.irp.f similarity index 100% rename from src/cosgtos_ao_int/gauss_legendre.irp.f rename to src/ao_two_e_ints/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f similarity index 97% rename from src/cosgtos_ao_int/two_e_Coul_integrals.irp.f rename to src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f index 527a98d5..ea9ff009 100644 --- a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f @@ -29,14 +29,14 @@ double precision function ao_two_e_integral_cosgtos(i, j, k, l) complex*16 :: integral5, integral6, integral7, integral8 complex*16 :: integral_tot - double precision :: ao_two_e_integral_cosgtos_schwartz_accel + double precision :: ao_2e_cosgtos_schwartz_accel complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then !print *, ' with shwartz acc ' - ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + ao_two_e_integral_cosgtos = ao_2e_cosgtos_schwartz_accel(i, j, k, l) else !print *, ' without shwartz acc ' @@ -294,7 +294,7 @@ end function ao_two_e_integral_cosgtos ! --- -double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) +double precision function ao_2e_cosgtos_schwartz_accel(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) @@ -329,7 +329,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + ao_2e_cosgtos_schwartz_accel = 0.d0 dim1 = n_pt_max_integrals @@ -519,8 +519,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -698,8 +697,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -709,11 +707,11 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) deallocate(schwartz_kl) -end function ao_two_e_integral_cosgtos_schwartz_accel +end function ao_2e_cosgtos_schwartz_accel ! --- -BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_2e_cosgtos_schwartz, (ao_num,ao_num)] BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -723,16 +721,16 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,a integer :: i, k double precision :: ao_two_e_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + ao_2e_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) - !$OMP PARALLEL DO PRIVATE(i,k) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_2e_cosgtos_schwartz) & !$OMP SCHEDULE(dynamic) do i = 1, ao_num do k = 1, i - ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) - ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + ao_2e_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_2e_cosgtos_schwartz(k,i) = ao_2e_cosgtos_schwartz(i,k) enddo enddo !$OMP END PARALLEL DO diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 82ffbc90..85ff5bcf 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -590,8 +590,20 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) +! call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) + integer :: ib, ic + if (ior(n_Ix,n_Iy) >= 0) then + do ib=0,n_Ix + do ic = 0,n_Iy + d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib) + enddo + enddo + + do n_pt_tmp = n_Ix+n_Iy, 0, -1 + if (d_poly(n_pt_tmp) /= 0.d0) exit + enddo + endif + if (n_pt_tmp == -1) then return endif @@ -600,8 +612,21 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) +! call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) + if (ior(n_pt_tmp,n_Iz) >= 0) then + ! Bottleneck here + do ib=0,n_pt_tmp + do ic = 0,n_Iz + d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib) + enddo + enddo + + do n_pt_out = n_pt_tmp+n_Iz, 0, -1 + if (d1(n_pt_out) /= 0.d0) exit + enddo + endif + + double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -948,8 +973,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= dble(a-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -970,8 +996,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) endif ny=0 @@ -988,9 +1015,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) - +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1028,8 +1055,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 @@ -1039,8 +1067,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end @@ -1067,8 +1096,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1086,8 +1116,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1097,9 +1128,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) - +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1146,8 +1177,10 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + call multiply_poly_c2(Y,ny,D_00,d,nd) + return case default @@ -1164,8 +1197,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) X(ix) *= dble(c-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_01,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_01,2,d,nd) + call multiply_poly_c2(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1174,8 +1208,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + call multiply_poly_c2(Y,ny,D_00,d,nd) end select end @@ -1233,3 +1268,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) enddo end + + +subroutine multiply_poly_local(b,nb,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:nc) + double precision, intent(inout) :: d(0:nb+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -1,10 +1,13 @@ BEGIN_PROVIDER [integer, n_points_final_grid] - implicit none + BEGIN_DOC ! Number of points which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_final_grid = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -16,9 +19,11 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo enddo - print*,'n_points_final_grid = ',n_points_final_grid - print*,'n max point = ',n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) + END_PROVIDER ! --- @@ -41,6 +46,10 @@ END_PROVIDER implicit none integer :: i, j, k, l, i_count double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -62,20 +71,34 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + + call wall_time(wall1) + print *, ' wall time for final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index ca50dd56..2ff96326 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -1,13 +1,37 @@ +! --- + program bi_ort_ints - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 10 - my_n_pt_a_grid = 14 - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_3e + !my_n_pt_r_grid = 10 + !my_n_pt_a_grid = 14 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + +! call test_3e +! call test_5idx +! call test_5idx2 + call test_4idx() + call test_4idx_n4() + !call test_4idx2() + !call test_5idx2 + !call test_5idx +end + +subroutine test_5idx2 + PROVIDE three_e_5_idx_cycle_2_bi_ort +end + +subroutine test_4idx2() + !PROVIDE three_e_4_idx_direct_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort end subroutine test_3e @@ -16,11 +40,12 @@ subroutine test_3e double precision :: accu, contrib,new,ref i = 1 k = 1 + n = 0 accu = 0.d0 do i = 1, mo_num - do k = 1, mo_num + do k = 1, mo_num do j = 1, mo_num - do l = 1, mo_num + do l = 1, mo_num do m = 1, mo_num do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) @@ -31,6 +56,7 @@ subroutine test_3e print*,'pb !!' print*,i,k,j,l,m,n print*,ref,new,contrib + stop endif enddo enddo @@ -42,3 +68,394 @@ subroutine test_3e end + +subroutine test_5idx + implicit none + integer :: i,k,j,l,m,n,ipoint + double precision :: accu, contrib,new,ref + double precision, external :: three_e_5_idx_exch12_bi_ort + i = 1 + k = 1 + n = 0 + accu = 0.d0 + PROVIDE three_e_5_idx_direct_bi_ort_old + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num +! if (dabs(three_e_5_idx_direct_bi_ort(m,l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,i,k,j)) > 1.d-10) then +! stop +! endif + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif +! +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch12' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' + +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch13' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch13' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! + enddo + enddo + enddo + enddo + enddo + print*,'accu = ',accu/dble(mo_num)**5 + + +end + +! --- + +subroutine test_4idx_n4() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-10 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + return +end + +! --- + +subroutine test_4idx() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-10 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + return +end diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 0d727785..355fa38f 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -54,7 +54,7 @@ BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_poi enddo enddo -! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu END_PROVIDER @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -138,10 +140,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao + endif call wall_time(wall1) print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -150,7 +155,7 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef @@ -177,6 +182,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !call wall_time(wall1) !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !call print_memory_usage() END_PROVIDER @@ -185,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -200,6 +210,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, enddo enddo + FREE int2_grad1_u12_bimo_transp + + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index 4d21cb93..ae100fb5 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -23,11 +23,11 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) + !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num do j = 1, mo_num do m = j, mo_num @@ -36,8 +36,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do i = 1, mo_num do j = 1, mo_num @@ -49,6 +49,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -102,6 +103,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -155,6 +157,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -208,6 +211,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -261,6 +265,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -306,6 +311,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -359,6 +365,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_ call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 5afd49ab..669861b7 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -1,282 +1,230 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_4_idx_direct_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) - three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 + integer :: ipoint, i, j, k, m, n + double precision :: wall1, wall0 + double precision :: tmp_loc_1, tmp_loc_2 + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) + double precision, allocatable :: tmp_2d(:,:) + double precision, allocatable :: tmp_aux_1(:,:,:), tmp_aux_2(:,:) - three_e_4_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...' + print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 + ! to reduce the number of operations + allocate(tmp_aux_1(n_points_final_grid,4,mo_num)) + allocate(tmp_aux_2(n_points_final_grid,mo_num)) -END_PROVIDER + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_1, tmp_aux_2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid -! -- + tmp_aux_1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) * final_weight_at_r_vector(ipoint) -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) - three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) - three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) - three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo + tmp_aux_2(ipoint,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO !$OMP END PARALLEL + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,4,mo_num)) + allocate(tmp2(n_points_final_grid,4,mo_num)) + + ! loops approach to break the O(N^4) scaling in memory + do k = 1, mo_num + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_2, tmp1) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) + tmp_loc_2 = tmp_aux_2(ipoint,n) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) + + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) + tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) + tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! i + + do j = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, j, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,j) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,j,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,j,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,j,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * int2_grad1_u12_bimo_t(ipoint,1,j,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,j) * int2_grad1_u12_bimo_t(ipoint,2,j,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,j) * int2_grad1_u12_bimo_t(ipoint,3,j,n) + + tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n) + tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n) + tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,m) + do i = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! j + enddo !k + + deallocate(tmp_2d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp_aux_1) + deallocate(tmp_aux_2) + + call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk_n4.irp.f b/src/bi_ort_ints/three_body_ijmk_n4.irp.f new file mode 100644 index 00000000..e3faeff0 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_n4.irp.f @@ -0,0 +1,486 @@ + +! --- + + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) + + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp3(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp3) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp5(1,1), n_points_final_grid, tmp4(1,1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp5) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_l_in_r_array_transp(1,1), n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp4) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_exch23_bi_ort_n4 ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5(1,1,1), n_points_final_grid, tmp6(1,1,1), n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort_n4(1,1,1,1), mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_exch23_bi_ort_n4', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmk_old.irp.f b/src/bi_ort_ints/three_body_ijmk_old.irp.f new file mode 100644 index 00000000..1a67f35b --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_old.irp.f @@ -0,0 +1,290 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) + three_e_4_idx_direct_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_direct_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) + three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_1_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! -- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) + three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_2_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) + three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) + three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch13_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) + three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch12_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..c2583c30 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,296 +1,245 @@ - ! --- +double precision function three_e_5_idx_exch12_bi_ort(m,l,i,k,j) result(integral) + implicit none + integer, intent(in) :: m,l,j,k,i + integral = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +end -BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = :: : notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! END_DOC implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - three_e_5_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' - call wall_time(wall0) + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision, allocatable :: grad_mli(:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:) + double precision, allocatable :: tmp_mat(:,:,:) provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + call print_memory_usage + print *, ' Providing the three_e_5_idx_bi_ort ...' + call wall_time(wall0) + + three_e_5_idx_direct_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_1_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_2_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_exch23_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_exch13_bi_ort (:,:,:,:,:) = 0.d0 + + call print_memory_usage + + allocate(tmp_mat(mo_num,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + orb_mat(ipoint,l,i) = final_weight_at_r_vector(ipoint) & + * mos_l_in_r_array_transp(ipoint,l) & + * mos_r_in_r_array_transp(ipoint,i) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 + tmp_mat = 0.d0 + call print_memory_usage -END_PROVIDER + do m = 1, mo_num -! --- + allocate(grad_mli(n_points_final_grid,mo_num)) -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC + grad_mli(ipoint,l) = & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) +& + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) +& + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo enddo enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + call dgemm('T','N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0,& + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num + !$OMP PARALLEL PRIVATE(j,k,l) + !$OMP DO + do k = 1, mo_num + do j = 1, mo_num do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) enddo enddo enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num + !$OMP END DO + !$OMP DO do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + do k = 1, mo_num + three_e_5_idx_direct_bi_ort(m,k,i,l,j) = three_e_5_idx_direct_bi_ort(m,k,i,l,j) - tmp_mat(l,j,k) enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + deallocate(grad_mli) -END_PROVIDER + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) -! --- + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + allocate(rm_grad_ik(n_points_final_grid,3,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num)) -END_PROVIDER + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid -! --- + rm_grad_ik(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + rk_grad_im(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC + enddo + enddo + !$OMP END PARALLEL DO - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - three_e_5_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) enddo enddo enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,k,j) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l) + three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) = three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) - tmp_mat(l,j,k) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k) + three_e_5_idx_exch13_bi_ort (m,l,i,k,j) = three_e_5_idx_exch13_bi_ort (m,l,i,k,j) - tmp_mat(k,j,l) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,j,k) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + enddo + + deallocate(rm_grad_ik) + deallocate(rk_grad_im) + deallocate(lk_grad_mi) + deallocate(lm_grad_ik) + enddo - !$OMP END DO - !$OMP END PARALLEL + + deallocate(tmp_mat) + + deallocate(orb_mat) call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 + call print_memory_usage() -END_PROVIDER - -! --- +END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmkl_old.irp.f b/src/bi_ort_ints/three_body_ijmkl_old.irp.f new file mode 100644 index 00000000..105cd179 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmkl_old.irp.f @@ -0,0 +1,295 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) + three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_direct_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER + diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..d8145c3e 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC -! matrix element of the -L three-body operator +! matrix element of the -L three-body operator ! ! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :) END_DOC @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n implicit none integer :: i, j, k, l, m, n double precision :: integral, wall1, wall0 - character*(128) :: name_file + character*(128) :: name_file three_body_ints_bi_ort = 0.d0 print *, ' Providing the three_body_ints_bi_ort ...' @@ -27,12 +27,12 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) ! else - !provide x_W_ki_bi_ortho_erf_rk + !provide x_W_ki_bi_ortho_erf_rk provide mos_r_in_r_array_transp mos_l_in_r_array_transp !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,n,integral) & + !$OMP PRIVATE (i,j,k,l,m,n,integral) & !$OMP SHARED (mo_num,three_body_ints_bi_ort) !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num @@ -43,7 +43,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) - three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral + three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral enddo enddo enddo @@ -57,13 +57,14 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n call wall_time(wall1) print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0 + call print_memory_usage() ! if(write_three_body_ints_bi_ort)then ! print*,'Writing three_body_ints_bi_ort on disk ...' ! call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) ! call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read") ! endif -END_PROVIDER +END_PROVIDER ! --- @@ -71,7 +72,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -79,28 +80,31 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 + ! (n, l, k, m, j, i) do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) - integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo end subroutine give_integrals_3_body_bi_ort @@ -111,7 +115,7 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -123,13 +127,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) @@ -138,11 +142,11 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) @@ -151,13 +155,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) ) @@ -176,7 +180,7 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS ! END_DOC @@ -188,13 +192,13 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) - integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & + integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) ) - integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & + integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) ) diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..25572854 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,7 +46,7 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo - + END_PROVIDER ! --- @@ -116,7 +116,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,7 +130,7 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo - + END_PROVIDER ! --- diff --git a/src/ccsd/EZFIO.cfg b/src/ccsd/EZFIO.cfg new file mode 100644 index 00000000..328cd981 --- /dev/null +++ b/src/ccsd/EZFIO.cfg @@ -0,0 +1,11 @@ +[energy] +type: double precision +doc: CCSD energy +interface: ezfio + +[energy_t] +type: double precision +doc: CCSD(T) energy +interface: ezfio + + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b63375cf..40c57188 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,5 +1,5 @@ subroutine run_ccsd_space_orb - + implicit none integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d @@ -12,34 +12,30 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) - + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) - integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) - - PROVIDE mo_two_e_integrals_in_map + integer :: nO, nV, nOa, nVa + +! PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) + nOa = cc_nOa + nVa = cc_nVa ! Check that the reference is a closed shell determinant if (cc_ref_is_open_shell) then call abort endif - + ! Number of occ/vir spatial orb nO = nOa nV = nVa - + allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir @@ -47,7 +43,7 @@ subroutine run_ccsd_space_orb !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) !print*,'occ',list_occ !print*,'vir',list_vir - + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) @@ -76,7 +72,7 @@ subroutine run_ccsd_space_orb print*,'Det energy', uncorr_energy call ccsd_energy_space(nO,nV,tau,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy - + nb_iter = 0 not_converged = .True. max_r1 = 0d0 @@ -86,9 +82,9 @@ subroutine run_ccsd_space_orb write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(ta) - + do while (not_converged) - + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vo(nO,nV,t1,t2,H_vo) @@ -97,7 +93,7 @@ subroutine run_ccsd_space_orb call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) max_r = max(max_r1,max_r2) - + ! Update if (cc_update_method == 'diis') then !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) @@ -109,7 +105,7 @@ subroutine run_ccsd_space_orb call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) else - print*,'Unkonw cc_method_method: '//cc_update_method + print*,'Unkown cc_method_method: '//cc_update_method endif call update_tau_space(nO,nV,t1,t2,tau) @@ -122,7 +118,7 @@ subroutine run_ccsd_space_orb if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then not_converged = .False. endif - + enddo write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(tb) @@ -139,20 +135,24 @@ subroutine run_ccsd_space_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) - + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif + ! Deallocation if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif deallocate(H_vv,H_oo,H_vo,r1,r2,tau) - + ! CCSD(T) double precision :: e_t + e_t = 0.d0 - if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then ! Dumb way !call wall_time(ta) @@ -169,8 +169,13 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + + e_t = uncorr_energy + energy ! For print in next call + call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) print*,'Time: ',tb-ta, ' s' @@ -180,9 +185,8 @@ subroutine run_ccsd_space_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' print*,'' endif - - print*,'Reference determinant:' - call print_det(det,N_int) + + call save_energy(uncorr_energy + energy, e_t) deallocate(t1,t2) @@ -211,8 +215,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) !$omp default(none) e = 0d0 !$omp do - do i = 1, nO - do a = 1, nV + do a = 1, nV + do i = 1, nO e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) enddo enddo @@ -232,7 +236,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) energy = energy + e !$omp end critical !$omp end parallel - + end ! Tau @@ -250,12 +254,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) ! internal integer :: i,j,a,b - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -267,7 +271,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) enddo !$OMP END DO !$OMP END PARALLEL - + end ! R1 @@ -283,7 +287,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! out double precision, intent(out) :: r1(nO,nV), max_r1 - + ! internal integer :: u,i,j,beta,a,b @@ -304,7 +308,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) - ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO @@ -324,7 +328,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call dgemm('T','N', nO, nV, nO, & 1d0, X_oo, size(X_oo,2), & t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) + 1d0, r1 , size(r1,1)) deallocate(X_oo) ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) @@ -373,7 +377,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -385,16 +389,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + call dgemv('T', nV*nO, nO*nV, & 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & H_vo , 1, & 1d0, r1 , 1) - + deallocate(X_voov) ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) - ! <=> + ! <=> ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) !do beta = 1, nV ! do u = 1, nO @@ -412,7 +416,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -429,17 +433,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & t1 , 1, & 1d0, r1 , 1) - + deallocate(X_ovov) - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) - ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do a = 1, nV ! do b = 1, nV - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) ! enddo ! enddo ! enddo @@ -452,24 +456,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) enddo enddo enddo enddo !$omp end do nowait - !$omp do collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do u = 1, nO - T_vvoo(a,b,i,u) = tau(i,u,a,b) + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo enddo @@ -481,17 +485,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_vvov,T_vvoo) - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) - ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do j = 1, nO ! do a = 1, nV - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! enddo ! enddo ! enddo @@ -504,8 +508,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) - !$omp do collapse(3) do u = 1, nO + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -513,23 +517,21 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('T','N', nO, nV, nO*nO*nV, & -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & tau , size(tau,1) * size(tau,2) * size(tau,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_oovo) max_r1 = 0d0 do a = 1, nV do i = 1, nO - if (dabs(r1(i,a)) > max_r1) then - max_r1 = dabs(r1(i,a)) - endif + max_r1 = max(dabs(r1(i,a)), max_r1) enddo enddo @@ -538,7 +540,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,r1) & !$omp private(a,i) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do i = 1, nO r1(i,a) = -r1(i,a) @@ -546,7 +548,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + end ! H_oo @@ -578,7 +580,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -601,8 +603,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 1d0, tau , size(tau,1), & cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) - + 1d0, H_oo , size(H_oo,1)) + end ! H_vv @@ -633,7 +635,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -656,13 +658,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - - !$omp do collapse(3) + + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) enddo enddo enddo @@ -676,7 +678,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) 1d0, H_vv , size(H_vv,1)) deallocate(tmp_tau) - + end ! H_vo @@ -704,7 +706,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! enddo ! enddo - ! + ! ! enddo !enddo @@ -727,7 +729,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -746,7 +748,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) 1d0, H_vo, 1) deallocate(w) - + end ! R2 @@ -765,13 +767,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! internal double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:) integer :: u,v,i,j,beta,gam,a,b allocate(g_occ(nO,nO), g_vir(nV,nV)) allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) allocate(A1(nO,nO,nO,nO)) - + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) call compute_A1(nO,nV,t1,t2,tau,A1) @@ -787,7 +789,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -835,13 +837,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! enddo !enddo - allocate(B1(nV,nV,nV,nV)) - call compute_B1(nO,nV,t1,t2,B1) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1,1) * size(B1,2), & - 1d0, r2, size(r2,1) * size(r2,2)) - deallocate(B1) +! allocate(B1(nV,nV,nV,nV)) +! call compute_B1(nO,nV,t1,t2,B1) + allocate(B1_gam(nV,nV,nV)) + do gam=1,nV + call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) + call dgemm('N','N',nO*nO,nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1_gam , size(B1_gam,1) * size(B1_gam,2), & + 1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2)) + enddo + deallocate(B1_gam) + !do gam = 1, nV ! do beta = 1, nV @@ -863,7 +870,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -875,7 +882,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & @@ -885,7 +892,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +928,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +964,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +986,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -991,7 +998,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1009,13 +1016,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1023,8 +1030,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nV*nO*nV,nV,nO, & @@ -1036,12 +1043,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, t1, size(t1,1), & Y_vovv, size(Y_vovv,1), & 0d0, X_oovv, size(X_oovv,1)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1055,7 +1062,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_vovo,Y_vovv) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1079,7 +1086,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1092,7 +1099,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1111,13 +1118,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1125,8 +1132,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO,nO*nV*nO,nV, & @@ -1138,12 +1145,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1155,7 +1162,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_vovo,Y_oovo) !do gam = 1, nV @@ -1182,19 +1189,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO - X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1206,17 +1213,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1228,9 +1235,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_ovvo,Y_voov) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1252,7 +1259,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1271,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1281,12 +1288,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1298,7 +1305,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1319,7 +1326,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do i = 1, nO do gam = 1, nV @@ -1331,7 +1338,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do beta = 1, nV do v = 1, nO do a = 1, nV @@ -1343,17 +1350,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1367,13 +1374,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_ovov,Y_ovov,Z_ovov) - + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2) & !$omp private(i,j,a,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do a = 1, nV do j = 1, nO @@ -1385,22 +1392,20 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + max_r2 = 0d0 do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - if (dabs(r2(i,j,a,b)) > max_r2) then - max_r2 = dabs(r2(i,j,a,b)) - endif + max_r2 = max(r2(i,j,a,b), max_r2) enddo enddo enddo enddo deallocate(g_occ,g_vir,J1,K1,A1) - + end ! A1 @@ -1429,12 +1434,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) = A1(u,v,i,j) & ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) - ! + ! ! do b = 1, nV ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - ! enddo + ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1442,13 +1447,13 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) - + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) !$omp parallel & !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1462,7 +1467,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do u = 1, nO @@ -1484,7 +1489,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1496,7 +1501,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) enddo !$omp end do !$omp end parallel - + deallocate(X_vooo,Y_oooo) ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) @@ -1510,11 +1515,95 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) 1d0, tau , size(tau,1) * size(tau,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & 1d0, A1 , size(A1,1) * size(A1,2)) - + end ! B1 +subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) + + implicit none + + integer, intent(in) :: nO,nV,gam + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! +! do i = 1, nO +! B1(a,b,beta) = B1(a,b,beta) & +! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & +! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) +! enddo +! +! enddo +! enddo +! enddo + + double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) +! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + !$omp end do nowait + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + +! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, 1, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1(1,gam), size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta) + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + subroutine compute_B1(nO,nV,t1,t2,B1) implicit none @@ -1532,28 +1621,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1) ! do beta = 1, nV ! do b = 1, nV ! do a = 1, nV - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) ! do i = 1, nO ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo - + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1564,8 +1653,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1573,17 +1662,17 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & t1 , size(t1,1), & 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) - + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & @@ -1594,7 +1683,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1606,9 +1695,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do !$omp end parallel - + deallocate(X_vvvo,Y_vvvv) - + end ! g_occ @@ -1629,14 +1718,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) !do i = 1, nO ! do u = 1, nO ! g_occ(u,i) = H_oo(u,i) - ! + ! ! do a = 1, nV ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) - ! + ! ! do j = 1, nO ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) ! enddo - ! + ! ! enddo ! enddo !enddo @@ -1657,8 +1746,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo enddo !$omp end do - - !$omp do collapse(1) + + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1670,7 +1759,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do !$omp end parallel - + end ! g_vir @@ -1691,23 +1780,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) !do beta = 1, nV ! do a = 1, nV ! g_vir(a,beta) = H_vv(a,beta) - ! + ! ! do i = 1, nO ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) - ! + ! ! do b = 1, nV ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) ! enddo - ! + ! ! enddo ! enddo !enddo - + call dgemm('N','N',nV,nV,nO, & -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - + !$omp parallel & !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & !$omp private(i,b,a,beta) & @@ -1720,7 +1809,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do collapse(1) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1732,7 +1821,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do !$omp end parallel - + end ! J1 @@ -1765,7 +1854,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! do b = 1, nV ! J1(u,a,beta,i) = J1(u,a,beta,i) & - ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) ! enddo ! do j = 1, nO @@ -1775,7 +1864,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1783,13 +1872,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) - + !$omp parallel & !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1797,10 +1886,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do a = 1, nV @@ -1812,7 +1901,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV*nO,nV,nO, & -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & t1 , size(t1,1), & @@ -1822,8 +1911,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Y_ovov) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1831,8 +1920,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovoo) @@ -1849,7 +1938,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1861,7 +1950,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1886,8 +1975,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,12 +1984,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,11 +1997,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - - !$omp do collapse(3) + do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1920,10 +2009,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & @@ -1933,8 +2022,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1942,12 +2031,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) - + deallocate(X_ovvo,Z_ovvo,Y_ovov) + end ! K1 @@ -1982,7 +2071,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! do b = 1, nV ! K1(u,a,i,beta) = K1(u,a,i,beta) & - ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) ! enddo ! do j = 1, nO @@ -1991,19 +2080,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2015,8 +2104,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do j = 1, nO do b = 1, nV @@ -2024,11 +2113,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -2036,8 +2125,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO*nV*nO,nV,nO, & @@ -2060,7 +2149,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,Z) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2074,5 +2163,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end parallel deallocate(X,Y,Z) - + end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index 23e2cef1..a267cc45 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -269,8 +269,11 @@ subroutine run_ccsd_spin_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif ! Deallocate if (cc_update_method == 'diis') then @@ -284,8 +287,9 @@ subroutine run_ccsd_spin_orb deallocate(v_ovoo,v_oovo) deallocate(v_ovvo,v_ovov,v_oovv) + double precision :: t_corr + t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then - double precision :: t_corr print*,'CCSD(T) calculation...' call wall_time(ta) !allocate(v_vvvo(nV,nV,nV,nO)) @@ -307,8 +311,8 @@ subroutine run_ccsd_spin_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' print*,'' endif - print*,'Reference determinant:' - call print_det(det,N_int) + + call save_energy(uncorr_energy + energy, t_corr) deallocate(f_oo,f_ov,f_vv,f_o,f_v) deallocate(v_ooov,v_vvoo,t1,t2) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 3b762a06..1aab6bd7 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -10,51 +10,43 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) - double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) - double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) - double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc + double precision :: e,ta,tb - !allocate(W(nV,nV,nV,nO,nO,nO)) - !allocate(V(nV,nV,nV,nO,nO,nO)) - allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) - allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) - allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) - allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - ! Temporary arrays !$OMP PARALLEL & - !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV - X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV - T_vvoo(d,c,k,j) = t2(k,j,c,d) + T_voov(d,k,j,c) = t2(k,j,c,d) enddo enddo enddo @@ -62,191 +54,399 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END DO nowait !v_vooo(c,j,k,l) * t2(i,l,a,b) & - !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do c = 1, nV - do l = 1, nO - X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !$OMP DO collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do l = 1, nO - T_ovvo(l,a,b,i) = t2(i,l,a,b) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !v_vvoo(b,c,j,k) * t1(i,a) & - !X_vvoo(b,c,k,j) * T1_vo(a,i) & - - !$OMP DO collapse(3) - do j = 1, nO + !$OMP DO + do c = 1, nV do k = 1, nO - do c = 1, nV - do b = 1, nV - X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(1) - do i = 1, nO + !$OMP DO + do b = 1, nV do a = 1, nV - T_vo(a,i) = t1(i,a) + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO nowait - call wall_time(ta) - energy = 0d0 + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO do c = 1, nV do b = 1, nV - do a = 1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) - call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) - call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) - call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) - - call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) - !$OMP PARALLEL & - !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& - !$OMP PRIVATE(i,j,k,e,delta) & - !$OMP DEFAULT(NONE) - e = 0d0 - !$OMP DO - do i = 1, nO - do j = 1, nO - do k = 1, nO - delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) - e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta - enddo - enddo + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - energy = energy + e - !$OMP END CRITICAL - !$OMP END PARALLEL enddo enddo - call wall_time(tb) - write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo + !$OMP END DO nowait - energy = energy / 3d0 + !$OMP END PARALLEL - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) - !deallocate(V,W) + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + + !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) + e = 0d0 + !$OMP DO SCHEDULE(dynamic) + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + enddo + + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + + !$OMP END PARALLEL + + energy = energy / 3.d0 + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) end -subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t2(nO,nO,nV,nV) - double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) - double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) + double precision, intent(out) :: W_cba(nO,nO,nO) + double precision, intent(out) :: W_bca(nO,nO,nO) + double precision, intent(out) :: W_cab(nO,nO,nO) + double precision, intent(out) :: W_bac(nO,nO,nO) + double precision, intent(out) :: W_acb(nO,nO,nO) integer :: l,i,j,k,d + double precision, allocatable, dimension(:,:,:,:) :: W_ikj + double precision, allocatable :: X(:,:,:,:) + allocate(W_ikj(nO,nO,nO,6)) + allocate(X(nV,nO,nO,3)) - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & - !$OMP PRIVATE(i,j,k,d,l) & - !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do i = 1, nO - W_abc(i,j,k) = 0.d0 - - do d = 1, nV - W_abc(i,j,k) = W_abc(i,j,k) & - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) - - enddo - - do l = 1, nO - W_abc(i,j,k) = W_abc(i,j,k) & - - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & - - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj - - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik - - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij - - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj - - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik - enddo - + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k,1) = T_voov(d,k,i,a) + X(d,i,k,2) = T_voov(d,k,i,b) + X(d,i,k,3) = T_voov(d,k,i,c) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + +! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO) + +! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO) +! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO) + +! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO) + + deallocate(X) + + allocate(X(nO,nO,nO,3)) + + do k=1,nO + do j=1,nO + do l=1,nO + X(l,j,k,1) = X_ooov(l,k,j,a) + X(l,j,k,2) = X_ooov(l,k,j,b) + X(l,j,k,3) = X_ooov(l,k,j,c) + enddo + enddo + enddo + + +! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO) + +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO) + +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + +! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + + do k=1,nO + do j=1,nO + do i=1,nO + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) + W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5) + W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6) + enddo + enddo + enddo + + deallocate(X,W_ikj) end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t1(nO,nV) - double precision, intent(in) :: T_vo(nV,nO) - double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) - double precision, intent(in) :: W(nO,nO,nO) - double precision, intent(out) :: V(nO,nO,nO) + double precision, intent(in) :: T_ov(nO,nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) integer :: i,j,k - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & - !$OMP PRIVATE(i,j,k) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) do k = 1, nO do j = 1, nO do i = 1, nO - !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & - V(i,j,k) = W(i,j,k) & - + X_vvoo(b,c,k,j) * T_vo(a,i) & - + X_vvoo(a,c,k,i) * T_vo(b,j) & - + X_vvoo(a,b,j,i) * T_vo(c,k) + V_abc(i,j,k) = W_abc(i,j,k) & + + X_oovv(j,k,b,c) * T_ov(i,a) & + + X_oovv(i,k,a,c) * T_ov(j,b) & + + X_oovv(i,j,a,b) * T_ov(k,c) + + V_cba(i,j,k) = W_cba(i,j,k) & + + X_oovv(j,k,b,a) * T_ov(i,c) & + + X_oovv(i,k,c,a) * T_ov(j,b) & + + X_oovv(i,j,c,b) * T_ov(k,a) + + V_bca(i,j,k) = W_bca(i,j,k) & + + X_oovv(j,k,c,a) * T_ov(i,b) & + + X_oovv(i,k,b,a) * T_ov(j,c) & + + X_oovv(i,j,b,c) * T_ov(k,a) + + V_cab(i,j,k) = W_cab(i,j,k) & + + X_oovv(j,k,a,b) * T_ov(i,c) & + + X_oovv(i,k,c,b) * T_ov(j,a) & + + X_oovv(i,j,c,a) * T_ov(k,b) + + V_bac(i,j,k) = W_bac(i,j,k) & + + X_oovv(j,k,a,c) * T_ov(i,b) & + + X_oovv(i,k,b,c) * T_ov(j,a) & + + X_oovv(i,j,b,a) * T_ov(k,c) + + V_acb(i,j,k) = W_acb(i,j,k) & + + X_oovv(j,k,c,b) * T_ov(i,a) & + + X_oovv(i,k,a,b) * T_ov(j,c) & + + X_oovv(i,j,a,c) * T_ov(k,b) + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f new file mode 100644 index 00000000..31fe67ce --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -0,0 +1,375 @@ +! Main +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(inout) :: energy + + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb,eccsd + + eccsd = energy + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) + + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO + do d = 1, nV + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO + do d = 1, nV + T_voov(d,k,j,c) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & + + !$OMP DO + do c = 1, nV + do k = 1, nO + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO + do c = 1, nV + do b = 1, nV + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP END PARALLEL + + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc +! logical, external :: omp_test_lock + + double precision, allocatable :: memo(:), Pabc(:), waccu(:) + integer*8, allocatable :: sampled(:) +! integer(omp_lock_kind), allocatable :: lock(:) + integer*2 , allocatable :: abc(:,:) + integer*8 :: Nabc, i8,kiter + integer*8, allocatable :: iorder(:) + double precision :: eocc + double precision :: norm + integer :: isample + + + ! Prepare table of triplets (a,b,c) + + Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(0:Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) + +! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) + Nabc = 0_8 + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + Nabc = Nabc + 1_8 + Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) + abc(1,Nabc) = int(a,2) + abc(2,Nabc) = int(b,2) + abc(3,Nabc) = int(c,2) + enddo + + Nabc = Nabc + 1_8 + abc(1,Nabc) = int(a,2) + abc(2,Nabc) = int(b,2) + abc(3,Nabc) = int(a,2) + Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) + + Nabc = Nabc + 1_8 + abc(1,Nabc) = int(b,2) + abc(2,Nabc) = int(a,2) + abc(3,Nabc) = int(b,2) + Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) + enddo + enddo + + do i8=1,Nabc + iorder(i8) = i8 + enddo + + ! Sort triplets in decreasing Pabc + call dsort_big(Pabc, iorder, Nabc) + + ! Normalize + norm = 0.d0 + do i8=Nabc,1,-1 + norm = norm + Pabc(i8) + enddo + norm = 1.d0/norm + do i8=1,Nabc + Pabc(i8) = Pabc(i8) * norm + enddo + + call i8set_order_big(abc, iorder, Nabc) + + + ! Cumulative distribution for sampling + waccu(Nabc) = 0.d0 + do i8=Nabc-1,1,-1 + waccu(i8) = waccu(i8+1) - Pabc(i8+1) + enddo + waccu(:) = waccu(:) + 1.d0 + waccu(0) = 0.d0 + + logical :: converged, do_comp + double precision :: eta, variance, error, sample + double precision :: t00, t01 + integer*8 :: ieta, Ncomputed + integer*8, external :: binary_search + + integer :: nbuckets + nbuckets = 100 + + double precision, allocatable :: wsum(:) + allocate(wsum(nbuckets)) + + converged = .False. + Ncomputed = 0_8 + + energy = 0.d0 + variance = 0.d0 + memo(:) = 0.d0 + sampled(:) = -1_8 + + integer*8 :: ileft, iright, imin + ileft = 1_8 + iright = Nabc + integer*8, allocatable :: bounds(:,:) + + allocate (bounds(2,nbuckets)) + do isample=1,nbuckets + eta = 1.d0/dble(nbuckets) * dble(isample) + ieta = binary_search(waccu,eta,Nabc) + bounds(1,isample) = ileft + bounds(2,isample) = ieta + ileft = ieta+1 + wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) ) + enddo + + Pabc(:) = 1.d0/Pabc(:) + + print '(A)', '' + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' | E(CCSD(T)) | Error | % |' + print '(A)', ' +----------------------+--------------+----------+' + + + call wall_time(t00) + imin = 1_8 + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + !$OMP DEFAULT(SHARED) + + do kiter=1,Nabc + + !$OMP MASTER + do while (imin <= Nabc) + if (sampled(imin)>-1_8) then + imin = imin+1 + else + exit + endif + enddo + + ! Deterministic part + if (imin < Nabc) then + ieta=imin + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + + ! Stochastic part + call random_number(eta) + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + cycle + endif + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + + if (sampled(ieta) == -1_8) then + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + sampled(ieta) = sampled(ieta)+1_8 + + enddo + + call wall_time(t01) + if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then + t00 = t01 + + !$OMP TASKWAIT + + double precision :: ET, ET2 + double precision :: energy_stoch, energy_det + double precision :: scale + double precision :: w + double precision :: tmp + energy_stoch = 0.d0 + energy_det = 0.d0 + norm = 0.d0 + scale = 1.d0 + ET = 0.d0 + ET2 = 0.d0 + + + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample))) + scale = scale - wsum(isample) + else + exit + endif + enddo + + isample = min(isample,nbuckets) + do ieta=bounds(1,isample), Nabc + w = dble(max(sampled(ieta),0_8)) + tmp = w * memo(ieta) * Pabc(ieta) + ET = ET + tmp + ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta) + norm = norm + w + enddo + norm = norm/scale + if (norm > 0.d0) then + energy_stoch = ET / norm + variance = ET2 / norm - energy_stoch*energy_stoch + endif + + energy = energy_det + energy_stoch + + print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + endif + !$OMP END MASTER + if (imin >= Nabc) exit + enddo + + !$OMP END PARALLEL + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', '' + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) +end + + + +integer*8 function binary_search(arr, key, sze) + implicit none + BEGIN_DOC +! Searches the key in array arr(1:sze) between l_in and r_in, and returns its index + END_DOC + integer*8 :: sze, i, j, mid + double precision :: arr(0:sze) + double precision :: key + + if ( key < arr(1) ) then + binary_search = 0_8 + return + end if + + if ( key >= arr(sze) ) then + binary_search = sze + return + end if + + i = 0_8 + j = sze + 1_8 + + do while (.True.) + mid = (i + j) / 2_8 + if ( key >= arr(mid) ) then + i = mid + else + j = mid + end if + if (j-i <= 1_8) then + binary_search = i + return + endif + end do +end function binary_search + diff --git a/src/ccsd/save_energy.irp.f b/src/ccsd/save_energy.irp.f new file mode 100644 index 00000000..30d93ec3 --- /dev/null +++ b/src/ccsd/save_energy.irp.f @@ -0,0 +1,13 @@ +subroutine save_energy(E,ET) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E, ET + call ezfio_set_ccsd_energy(E) + if (ET /= 0.d0) then + call ezfio_set_ccsd_energy_t(E+ET) + endif +end + + diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 6f40a809..b8fa2895 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -76,6 +76,8 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) double precision, allocatable :: fock_diag_tmp(:,:) + if (csubset == 0) return + allocate(fock_diag_tmp(2,mo_num+1)) call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) @@ -86,6 +88,10 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo + if ((subset == 1).and.(sum(hole_mask(:,2)) == 0_bit_kind)) then + ! No beta electron to excite + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b) + endif call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -140,7 +146,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d use selection_types implicit none BEGIN_DOC -! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted +! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC integer, intent(in) :: i_generator, subset, csubset @@ -177,6 +183,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d monoAdo = .true. monoBdo = .true. + if (csubset == 0) return do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) @@ -234,7 +241,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d enddo ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4 - ! Remove also contributions < 1.d-20) do j=1,N_det_alpha_unique call get_excitation_degree_spin(psi_det_alpha_unique(1,j), & psi_det_generators(1,1,i_generator), nt, N_int) @@ -477,7 +483,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do s2=s1,2 sp = s1 - if(s1 /= s2) sp = 3 + if(s1 /= s2) then + sp = 3 + endif ib = 1 if(s1 == s2) ib = i1+1 @@ -525,7 +533,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + +BEGIN_TEMPLATE + +subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -559,7 +570,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d s1 = sp s2 = sp end if - call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + + if ($IS_DOUBLE) then + if (h2 == 0) then + print *, 'h2=0 in '//trim(irp_here) + stop + endif + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + else + if (h2 /= 0) then + print *, 'h2 /= in '//trim(irp_here) + stop + endif + call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, mask, ok, N_int) + endif E_shift = 0.d0 if (h0_type == 'CFG') then @@ -567,12 +591,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j) endif - do p1=1,mo_num - if(bannedOrb(p1, s1)) cycle + $DO_p1 +! do p1=1,mo_num + + if (bannedOrb(p1, s1)) cycle ib = 1 if(sp /= 3) ib = p1+1 - do p2=ib,mo_num + $DO_p2 + ! do p2=ib,mo_num ! ----- ! /!\ Generating only single excited determinants doesn't work because a @@ -581,9 +608,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! detected as already generated when generating in the future with a ! double excitation. ! ----- - - if(bannedOrb(p2, s2)) cycle - if(banned(p1,p2)) cycle + if ($IS_DOUBLE) then + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + endif if(pseudo_sym)then if(dabs(mat(1, p1, p2)).lt.thresh_sym)then @@ -593,7 +621,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d val = maxval(abs(mat(1:N_states, p1, p2))) if( val == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + if ($IS_DOUBLE) then + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + else + call apply_particle(mask, s1, p1, det, ok, N_int) + endif if (do_only_cas) then integer, external :: number_of_holes, number_of_particles @@ -794,7 +826,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection if (h0_type == 'CFG') then - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & / c0_weight(istate) else w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) @@ -854,10 +886,19 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) end if - end do - end do + ! enddo + $ENDDO_p1 +! enddo + $ENDDO_p2 end +SUBST [ DOUBLE , DO_p1 , ENDDO_p1 , DO_p2 , ENDDO_p2 , IS_DOUBLE ] + +double ; do p1=1,mo_num ; enddo ; do p2=ib,mo_num ; enddo ; .True. ;; +single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; + +END_TEMPLATE + subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none @@ -879,6 +920,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp psi_det_sorted mat = 0d0 + p=0 do i=1,N_int negMask(i,1) = not(mask(i,1)) @@ -1432,7 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,sp) p2 = p(2,sp) do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle + if (bannedOrb(puti, sp)) cycle call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num @@ -1443,7 +1485,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj)) + hij = hij_cache1(putj) - hij_cache2(putj) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1503,7 +1545,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none BEGIN_DOC -! Identify the determinants in det which are in the internal space. These are +! Identify the determinants in det that are in the internal space. These are ! the determinants that can be produced by creating two particles on the mask. END_DOC @@ -1531,7 +1573,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - ! If det(i) < det(i_gen), it hs already been considered + ! If det(i) < det(i_gen), it has already been considered if(interesting(i) < i_gen) then fullMatch = .true. return @@ -1582,352 +1624,4 @@ end -! OLD unoptimized routines for debugging -! ====================================== - -subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer :: i, j, s, h1, h2, p1, p2, puti, putj - double precision :: hij, phase - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - integer :: bant - bant = 1 - - - if(sp == 3) then ! AB - h1 = p(1,1) - h2 = p(1,2) - do p1=1, mo_num - if(bannedOrb(p1, 1)) cycle - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = mo_two_e_integral(p1, p2, h1, h2) * phase - end if - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do - else ! AA BB - p1 = p(1,sp) - p2 = p(2,sp) - do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if -end - -subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j - integer :: hfix, pfix, h1, h2, p1, p2, ib - - integer, parameter :: turn2(2) = (/2,1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - - - allocate (lbanned(mo_num, 2)) - lbanned = bannedOrb - - do i=1, p(0,1) - lbanned(p(i,1), 1) = .true. - end do - do i=1, p(0,2) - lbanned(p(i,2), 2) = .true. - end do - - ma = 1 - if(p(0,2) >= 2) ma = 2 - mi = turn2(ma) - - bant = 1 - - if(sp == 3) then - !move MA - if(ma == 2) bant = 2 - puti = p(1,mi) - hfix = h(1,ma) - p1 = p(1,ma) - p2 = p(2,ma) - if(.not. bannedOrb(puti, mi)) then - tmp_row = 0d0 - do putj=1, hfix-1 - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - do putj=hfix+1, mo_num - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - - if(ma == 1) then - mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) - else - mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) - end if - end if - - !MOVE MI - pfix = p(1,mi) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,mi)) cycle - !p1 fixed - putj = p1 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p2 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - - if(mi == 1) then - mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) - mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) - else - mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) - mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) - end if - else - if(p(0,ma) == 3) then - do i=1,3 - hfix = h(1,ma) - puti = p(i, ma) - p1 = p(turn3(1,i), ma) - p2 = p(turn3(2,i), ma) - tmp_row = 0d0 - do putj=1,hfix-1 - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - do putj=hfix+1,mo_num - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - - mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) - mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) - end do - else - hfix = h(1,mi) - pfix = p(1,mi) - p1 = p(1,ma) - p2 = p(2,ma) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,ma)) cycle - putj = p2 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p1 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) - mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) - mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) - mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) - end if - end if - deallocate(lbanned) - - !! MONO - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - do i1=1,p(0,s1) - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=ib,p(0,s2) - p1 = p(i1,s1) - p2 = p(i2,s2) - if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do -end - -subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(2,N_int) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - double precision, external :: get_phase_bi, mo_two_e_integral - - integer :: i, j, tip, ma, mi, puti, putj - integer :: h1, h2, p1, p2, i1, i2 - double precision :: hij, phase - - integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) - integer, parameter :: turn2(2) = (/2, 1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - bant = 1 - - tip = p(0,1) * p(0,2) - - ma = sp - if(p(0,1) > p(0,2)) ma = 1 - if(p(0,1) < p(0,2)) ma = 2 - mi = mod(ma, 2) + 1 - - if(sp == 3) then - if(ma == 2) bant = 2 - - if(tip == 3) then - puti = p(1, mi) - do i = 1, 3 - putj = p(i, ma) - if(banned(putj,puti,bant)) cycle - i1 = turn3(1,i) - i2 = turn3(2,i) - p1 = p(i1, ma) - p2 = p(i2, ma) - h1 = h(1, ma) - h2 = h(2, ma) - - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) - if(ma == 1) then - mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij - else - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end do - else - h1 = h(1,1) - h2 = h(1,2) - do j = 1,2 - putj = p(j, 2) - p2 = p(turn2(j), 2) - do i = 1,2 - puti = p(i, 1) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if - - else - if(tip == 0) then - h1 = h(1, ma) - h2 = h(2, ma) - do i=1,3 - puti = p(i, ma) - do j=i+1,4 - putj = p(j, ma) - if(banned(puti,putj,1)) cycle - - i1 = turn2d(1, i, j) - i2 = turn2d(2, i, j) - p1 = p(i1, ma) - p2 = p(i2, ma) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - else if(tip == 3) then - h1 = h(1, mi) - h2 = h(1, ma) - p1 = p(1, mi) - do i=1,3 - puti = p(turn3(1,i), ma) - putj = p(turn3(2,i), ma) - if(banned(puti,putj,1)) cycle - p2 = p(i, ma) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) - mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij - end do - else ! tip == 4 - puti = p(1, sp) - putj = p(2, sp) - if(.not. banned(puti,putj,1)) then - p1 = p(1, mi) - p2 = p(2, mi) - h1 = h(1, mi) - h2 = h(2, mi) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end if - end if -end - diff --git a/src/cipsi/selection_old.irp.f b/src/cipsi/selection_old.irp.f new file mode 100644 index 00000000..8fd5bc2b --- /dev/null +++ b/src/cipsi/selection_old.irp.f @@ -0,0 +1,350 @@ + +! OLD unoptimized routines for debugging +! ====================================== + +subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = mo_two_e_integral(p1, p2, h1, h2) * phase + end if + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num +! do not cycle here? otherwise singles will be missed?? + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if +end + +subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + allocate (lbanned(mo_num, 2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + do putj=hfix+1,mo_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do +end + +subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(2,N_int) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij + else + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) + mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end if + end if +end + diff --git a/src/cipsi/selection_singles.irp.f b/src/cipsi/selection_singles.irp.f new file mode 100644 index 00000000..3821576c --- /dev/null +++ b/src/cipsi/selection_singles.irp.f @@ -0,0 +1,356 @@ +use bitmasks + +subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! Select determinants connected to i_det by H + END_DOC + integer, intent(in) :: i_gen + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + logical, allocatable :: banned(:,:), bannedOrb(:) + double precision, allocatable :: mat(:,:,:) + integer :: i, j, k + integer :: h1,h2,s1,s2,i1,i2,ib,sp + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + logical :: fullMatch, ok + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) + enddo + + allocate(banned(mo_num,mo_num), bannedOrb(mo_num), mat(N_states, mo_num, 1)) + banned = .False. + + ! Create lists of holes and particles + ! ----------------------------------- + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do sp=1,2 + do i=1, N_holes(sp) + h1 = hole_list(i,sp) + call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) + bannedOrb = .true. + do j=1,N_particles(sp) + bannedOrb(particle_list(j, sp)) = .false. + end do + call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) + if(fullMatch) cycle + mat = 0d0 + call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, mat(1,1,1)) + call fill_buffer_single(i_gen, sp, h1, 0, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + end do + enddo +end subroutine + + +subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N, sp + logical, intent(inout) :: banned(mo_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3), nt + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + nt = 0 + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) + end do + + if(nt > 3) cycle + + if(nt <= 2 .and. i < i_gen) then + fullMatch = .true. + return + end if + + call bitstring_to_list(myMask(1,sp), list(1), na, N_int) + + if(nt == 3 .and. i < i_gen) then + do j=1,na + banned(list(j)) = .true. + end do + else if(nt == 1 .and. na == 1) then + banned(list(1)) = .true. + end if + end do genl +end subroutine + + +subroutine splash_p(mask, sp, det, coefs, N_sel, bannedOrb, vect) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) + double precision, intent(in) :: coefs(N_states, N_sel) + integer, intent(in) :: sp, N_sel + logical, intent(inout) :: bannedOrb(mo_num) + double precision, intent(inout) :: vect(N_states, mo_num) + + integer :: i, j, h(0:2,2), p(0:3,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int, 2) + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 3) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + call get_mask_phase(psi_det_sorted(1,1,i), phasemask, N_int) + + if(nt == 3) then + call get_m2(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else if(nt == 2) then + call get_m1(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else + call get_m0(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + end if + end do +end subroutine + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, mo_two_e_integral + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) + hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = mo_two_e_integral(pfix, pmob, hfix, hmob) + hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) + vect(:, puti) += hij * coefs + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (mo_two_e_integral(p1,p2,h1,h2) - mo_two_e_integral(p2,p1,h1,h2)) + hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end if + end if +end subroutine + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi,mo_two_e_integral + + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, i, hole) - mo_two_e_integral(p2, p1, i, hole)) + hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + do i=hole+1,mo_num + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, hole, i) - mo_two_e_integral(p2, p1, hole, i)) + hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) + vect(:,i) += hij * coefs + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p2) += hij * coefs + else + p2 = p(1, sh) + do i=1,mo_num + if(lbanned(i)) cycle + hij = mo_two_e_integral(p1, p2, i, hole) + hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + end if + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p1) += hij * coefs +end subroutine + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, i) += hij * coefs + end do +end subroutine + + + +! +!subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) +! use bitmasks +! use selection_types +! implicit none +! +! integer, intent(in) :: i_generator, sp, h1 +! double precision, intent(in) :: vect(N_states, mo_num) +! logical, intent(in) :: bannedOrb(mo_num) +! double precision, intent(in) :: fock_diag_tmp(mo_num) +! double precision, intent(in) :: E0(N_states) +! double precision, intent(inout) :: pt2(N_states) +! type(selection_buffer), intent(inout) :: buf +! logical :: ok +! integer :: s1, s2, p1, p2, ib, istate +! integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) +! double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp +! double precision, external :: diag_H_mat_elem_fock +! +! +! call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) +! +! do p1=1,mo_num +! if(bannedOrb(p1)) cycle +! if(vect(1, p1) == 0d0) cycle +! call apply_particle(mask, sp, p1, det, ok, N_int) +! +! +! Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) +! max_e_pert = 0d0 +! +! do istate=1,N_states +! val = vect(istate, p1) + vect(istate, p1) +! delta_E = E0(istate) - Hii +! tmp = dsqrt(delta_E * delta_E + val * val) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif +! e_pert = 0.5d0 * ( tmp - delta_E) +! pt2(istate) += e_pert +! if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert +! end do +! +! if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) +! end do +!end subroutine +! diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 4c271a4b..77377554 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! and transpose ! ------------------------------------------- -! call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii) double precision :: hmono, htwoe, hthree call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states @@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg deleted file mode 100644 index 8edeecd0..00000000 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ /dev/null @@ -1,19 +0,0 @@ -[ao_expoim_cosgtos] -type: double precision -doc: imag part for Exponents for each primitive of each cosGTOs |AO| -size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) -interface: ezfio, provider - -[use_cosgtos] -type: logical -doc: If true, use cosgtos for AO integrals -interface: ezfio,provider,ocaml -default: False - -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED deleted file mode 100644 index 932f88a3..00000000 --- a/src/cosgtos_ao_int/NEED +++ /dev/null @@ -1,2 +0,0 @@ -ezfio_files -ao_basis diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst deleted file mode 100644 index 01f25d6d..00000000 --- a/src/cosgtos_ao_int/README.rst +++ /dev/null @@ -1,4 +0,0 @@ -============== -cosgtos_ao_int -============== - diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f deleted file mode 100644 index d65dfba5..00000000 --- a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f +++ /dev/null @@ -1,7 +0,0 @@ -program cosgtos_ao_int - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' -end diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index f3226307..48082506 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -545,11 +545,6 @@ end - - - - - integer function zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id) use f77_zmq implicit none diff --git a/src/ezfio_files/00.create.bats b/src/ezfio_files/00.create.bats index cfa6247d..49430a0b 100644 --- a/src/ezfio_files/00.create.bats +++ b/src/ezfio_files/00.create.bats @@ -23,6 +23,34 @@ function run { qp set mo_two_e_ints io_mo_two_e_integrals "Write" } +@test "H2_1" { + run h2_1.xyz 1 0 cc-pvdz +} + +@test "H2_3" { + run h2_3.xyz 3 0 cc-pvdz +} + +@test "H3_2" { + run h3_2.xyz 2 0 cc-pvdz +} + +@test "H3_4" { + run h3_4.xyz 4 0 cc-pvdz +} + +@test "H4_1" { + run h4_1.xyz 1 0 cc-pvdz +} + +@test "H4_3" { + run h4_3.xyz 3 0 cc-pvdz +} + +@test "H4_5" { + run h4_5.xyz 5 0 cc-pvdz +} + @test "B-B" { qp set_file b2_stretched.ezfio diff --git a/src/fci/40.fci.bats b/src/fci/40.fci.bats index 4523d0e0..889bf90a 100644 --- a/src/fci/40.fci.bats +++ b/src/fci/40.fci.bats @@ -10,8 +10,8 @@ function run() { qp set perturbation do_pt2 False qp set determinants n_det_max 8000 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 8 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 8 qp run fci energy1="$(ezfio get fci energy | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh @@ -24,99 +24,134 @@ function run_stoch() { qp set perturbation do_pt2 True qp set determinants n_det_max $3 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 1 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 1 qp run fci energy1="$(ezfio get fci energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh } -@test "B-B" { +@test "H2_1" { # 1s + qp set_file h2_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.06415255 1.e-8 10000 +} + +@test "H2_3" { # 1s + qp set_file h2_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -0.96029881 1.e-8 10000 +} + +@test "H3_2" { # 3s + qp set_file h3_2.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.61003132 1.e-8 10000 +} + +@test "H3_4" { # 2s + qp set_file h3_4.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.02434843 1.e-8 10000 +} + +@test "H4_1" { # 13s + qp set_file h4_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -2.01675062 1.e-8 10000 +} + +@test "H4_3" { # 10s + qp set_file h4_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.95927626 1.e-8 10000 +} + +@test "H4_5" { # 3s + qp set_file h4_5.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.25852765 1.e-8 10000 +} + +@test "B-B" { # 10s qp set_file b2_stretched.ezfio qp set determinants n_det_max 10000 qp set_frozen_core run_stoch -49.14103054419 3.e-4 10000 } -@test "F2" { # 4.07m - [[ -n $TRAVIS ]] && skip - qp set_file f2.ezfio - qp set_frozen_core - run_stoch -199.304922384814 3.e-3 100000 -} - -@test "NH3" { # 10.6657s +@test "NH3" { # 8s qp set_file nh3.ezfio qp set_mo_class --core="[1-4]" --act="[5-72]" run -56.244753429144986 3.e-4 100000 } -@test "DHNO" { # 11.4721s +@test "DHNO" { # 8s qp set_file dhno.ezfio qp set_mo_class --core="[1-7]" --act="[8-64]" - run -130.459020029816 3.e-4 100000 + run -130.466208113547 3.e-4 100000 } -@test "HCO" { # 12.2868s +@test "HCO" { # 32s qp set_file hco.ezfio - run -113.393356604085 1.e-3 100000 + run -113.395751656985 1.e-3 100000 } -@test "H2O2" { # 12.9214s +@test "H2O2" { # 21s qp set_file h2o2.ezfio qp set_mo_class --core="[1-2]" --act="[3-24]" --del="[25-38]" run -151.005848404095 1.e-3 100000 } -@test "HBO" { # 13.3144s +@test "HBO" { # 18s [[ -n $TRAVIS ]] && skip qp set_file hbo.ezfio - run -100.213 1.5e-3 100000 + run -100.214 1.5e-3 100000 } -@test "H2O" { # 11.3727s +@test "H2O" { # 16s [[ -n $TRAVIS ]] && skip qp set_file h2o.ezfio - run -76.2361605151999 5.e-4 100000 + run -76.238051555276 5.e-4 100000 } -@test "ClO" { # 13.3755s +@test "ClO" { # 47s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio - run -534.546453546852 1.e-3 100000 + run -534.548529710256 1.e-3 100000 } -@test "SO" { # 13.4952s +@test "SO" { # 23s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio run -26.015 3.e-3 100000 } -@test "H2S" { # 13.6745s +@test "H2S" { # 37s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio - run -398.859577605891 5.e-4 100000 + run -398.864853669111 5.e-4 100000 } -@test "OH" { # 13.865s +@test "OH" { # 12s [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio - run -75.6121856748294 3.e-4 100000 + run -75.615 1.5e-3 100000 } -@test "SiH2_3B1" { # 13.938ss +@test "SiH2_3B1" { # 10s [[ -n $TRAVIS ]] && skip qp set_file sih2_3b1.ezfio - run -290.0175411299477 3.e-4 100000 + run -290.0206626734517 3.e-4 100000 } -@test "H3COH" { # 14.7299s +@test "H3COH" { # 33s [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio - run -115.205632960026 1.e-3 100000 + run -115.206784386204 1.e-3 100000 } -@test "SiH3" { # 15.99s +@test "SiH3" { # 15s [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio run -5.572 1.e-3 100000 @@ -132,7 +167,7 @@ function run_stoch() { @test "ClF" { # 16.8864s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio - run -559.169748890031 1.5e-3 100000 + run -559.174371468224 1.5e-3 100000 } @test "SO2" { # 17.5645s @@ -170,12 +205,11 @@ function run_stoch() { run -187.970184372047 1.6e-3 100000 } - @test "[Cu(NH3)4]2+" { # 25.0417s [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio qp set_mo_class --core="[1-24]" --act="[25-45]" --del="[46-87]" - run -1862.9869374387192 3.e-04 100000 + run -1862.98320066637 3.e-04 100000 } @test "HCN" { # 20.3273s @@ -185,3 +219,10 @@ function run_stoch() { run -93.078 2.e-3 100000 } +@test "F2" { # 4.07m + [[ -n $TRAVIS ]] && skip + qp set_file f2.ezfio + qp set_frozen_core + run_stoch -199.304922384814 3.e-3 100000 +} + diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index b6ec073f..df753449 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -39,7 +39,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 print*,'*****' endif - psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) E_tc = eigval_right_tc_bi_orth(1) diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..3e6f229b 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,14 +54,18 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then @@ -76,13 +80,16 @@ subroutine run_cipsi_tc call json_close else + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(elec_alpha_num+elec_beta_num.ge.3)then if(three_body_h_tc)then call provide_all_three_ints_bi_ortho endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index df566032..6e7d0233 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -43,11 +43,39 @@ python write_pt_charges.py ${EZFIO} qp set nuclei point_charges True qp run scf | tee ${EZFIO}.pt_charges.out energy="$(ezfio get hartree_fock energy)" -good=-92.76613324421798 +good=-92.79920682236470 eq $energy $good $thresh rm -rf $EZFIO } +@test "H2_1" { # 1s + run h2_1.ezfio -1.005924963288527 +} + +@test "H2_3" { # 1s + run h2_3.ezfio -0.9591011604845440 +} + +@test "H3_2" { # 1s + run h3_2.ezfio -1.558273529860488 +} + +@test "H3_4" { # 1s + run h3_4.ezfio -1.0158684760025190 +} + +@test "H4_1" { # 1s + run h4_1.ezfio -1.932022805374405 +} + +@test "H4_3" { # 1s + run h4_3.ezfio -1.8948449927787350 +} + +@test "H4_5" { # 1s + run h4_5.ezfio -1.2408338805496990 +} + @test "point charges" { run_pt_charges } @@ -56,6 +84,8 @@ rm -rf $EZFIO run hcn.ezfio -92.88717500035233 } + + @test "B-B" { # 3s run b2_stretched.ezfio -48.9950585434279 } diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f deleted file mode 100644 index 7469ffd5..00000000 --- a/src/mo_optimization/my_providers.irp.f +++ /dev/null @@ -1,141 +0,0 @@ -! Dimensions of MOs - - -BEGIN_PROVIDER [ integer, n_mo_dim ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of MOs we can build, - ! with i>j - END_DOC - - n_mo_dim = mo_num*(mo_num-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_core ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of core MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_act ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of active MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_inact ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of inactive MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_virt ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of virtual MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 - -END_PROVIDER - -! Energies/criterions - -BEGIN_PROVIDER [ double precision, my_st_av_energy ] - implicit none - BEGIN_DOC - ! State average CI energy - END_DOC - - !call update_st_av_ci_energy(my_st_av_energy) - call state_average_energy(my_st_av_energy) - -END_PROVIDER - -! With all the MOs - -BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] -&BEGIN_PROVIDER [ double precision, my_CC1_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map - - call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(mo_num, mo_num, mo_num, mo_num)) - - call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) - -END_PROVIDER - -! With the list of active MOs -! Can be generalized to any mo_class by changing the list/dimension - -BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] -&BEGIN_PROVIDER [ double precision, my_CC2_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals - - call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) - - call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) - -END_PROVIDER diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..32c0dccd 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,11 +6,44 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + call set_multiple_levels_omp(.False.) + print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) enddo !$OMP END PARALLEL DO + print *, '' + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: i,j,k + double precision, allocatable :: buffer(:,:) + + print *, 'AO->MO Transformation of Cholesky vectors .' + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL PRIVATE(i,j,k,buffer) + allocate(buffer(mo_num,mo_num)) + !$OMP DO SCHEDULE(static) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp(k,i,j) = buffer(i,j) + enddo + enddo + enddo + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + print *, '' END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index 4ffb0134..d807f619 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -4,24 +4,68 @@ BEGIN_DOC ! big_array_coulomb_integrals(j,i,k) = = (ik|jj) ! - ! big_array_exchange_integrals(i,j,k) = = (ij|kj) + ! big_array_exchange_integrals(j,i,k) = = (ij|kj) END_DOC - integer :: i,j,k,l + integer :: i,j,k,l,a double precision :: get_two_e_integral double precision :: integral - do k = 1, mo_num - do i = 1, mo_num - do j = 1, mo_num - l = j - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - big_array_coulomb_integrals(j,i,k) = integral - l = j - integral = get_two_e_integral(i,j,l,k,mo_integrals_map) - big_array_exchange_integrals(j,i,k) = integral + if (do_ao_cholesky) then + + double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) + allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + do j=1,mo_num + buffer_jj(:,j) = cholesky_mo_transp(:,j,j) + enddo + + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + buffer_jj, cholesky_ao_num, 0.d0, & + buffer, mo_num*mo_num) + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + big_array_coulomb_integrals(j,i,k) = buffer(i,k,j) + enddo + enddo + enddo + deallocate(buffer_jj) + + allocate(buffer_jj(mo_num,mo_num)) + + do j = 1, mo_num + + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + buffer_jj, mo_num) + + do k=1,mo_num + do i=1,mo_num + big_array_exchange_integrals(j,i,k) = buffer_jj(i,k) + enddo + enddo + enddo + + deallocate(buffer_jj) + + else + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + l = j + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + big_array_coulomb_integrals(j,i,k) = integral + l = j + integral = get_two_e_integral(i,j,l,k,mo_integrals_map) + big_array_exchange_integrals(j,i,k) = integral + enddo + enddo enddo - enddo - enddo + + endif END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b7ef901d..a461504e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1353,15 +1353,30 @@ END_PROVIDER integer :: i,j double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map - mo_two_e_integrals_jj = 0.d0 - mo_two_e_integrals_jj_exchange = 0.d0 + + if (do_ao_cholesky) then + do j=1,mo_num + do i=1,mo_num + !TODO: use dgemm + mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) + mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + enddo + enddo + + else + + do j=1,mo_num + do i=1,mo_num + mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) + mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) + enddo + enddo + + endif do j=1,mo_num do i=1,mo_num - mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) - mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) - mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) + mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) enddo enddo diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 3f1a9bf5..44a6ae65 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -231,6 +231,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time0) PROVIDE j1b_type + PROVIDE int2_grad1u2_grad2u2_j1b2 do ipoint = 1, n_points_final_grid tmp1 = v_1b(ipoint) @@ -242,6 +243,8 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g enddo enddo + FREE int2_grad1u2_grad2u2_j1b2 + !if(j1b_type .eq. 0) then ! grad12_j12 = 0.d0 ! do ipoint = 1, n_points_final_grid @@ -262,6 +265,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -278,6 +282,9 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g print*, ' providing u12sq_j1bsq ...' call wall_time(time0) + ! do not free here + PROVIDE int2_u2_j1b2 + do ipoint = 1, n_points_final_grid tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -292,6 +299,7 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -310,6 +318,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...' call wall_time(time0) + PROVIDE int2_u_grad1u_j1b2 + PROVIDE int2_u_grad1u_x_j1b2 + do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) @@ -340,14 +351,17 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, enddo enddo + FREE int2_u_grad1u_j1b2 + FREE int2_u_grad1u_x_j1b2 + call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 + call print_memory_usage() END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -401,6 +415,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) + FREE int2_grad1_u12_square_ao + ! --- if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then @@ -442,6 +458,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) + + FREE int2_u2_j1b2 endif ! --- @@ -478,6 +496,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index 9b91a8ed..ac077fe0 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -35,7 +35,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] elseif(j1b_type .eq. 4) then - ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + ! v(r) = 1 - \sum_{a} \beta_a \exp(-\alpha_a (r - r_a)^2) do ipoint = 1, n_points_final_grid @@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] dz = z - nucl_coord(j,3) d = dx*dx + dy*dy + dz*dz - fact_r = fact_r - dexp(-a*d) + fact_r = fact_r - j1b_pen_coef(j) * dexp(-a*d) enddo v_1b(ipoint) = fact_r @@ -125,7 +125,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] elseif(j1b_type .eq. 4) then - ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + ! v(r) = 1 - \sum_{a} \beta_a \exp(-\alpha_a (r - r_a)^2) do ipoint = 1, n_points_final_grid @@ -144,7 +144,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] r2 = dx*dx + dy*dy + dz*dz a = j1b_pen(j) - e = a * dexp(-a * r2) + e = a * j1b_pen_coef(j) * dexp(-a * r2) ax_der += e * dx ay_der += e * dy diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index cbd0b406..bd7ff6b7 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -187,6 +187,19 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) + BEGIN_DOC +! gradient of j(mu(r1,r2),r12) form of jastrow. +! +! if mu(r1,r2) = cst ---> j1b_type < 200 and +! +! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) +! +! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and +! +! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) +! +! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + END_DOC include 'constants.include.F' implicit none @@ -283,7 +296,7 @@ double precision function j1b_nucl(r) d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - dexp(-a*d) + j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) enddo elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then @@ -350,7 +363,7 @@ double precision function j1b_nucl_square(r) d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - dexp(-a*d) + j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) enddo j1b_nucl_square = j1b_nucl_square * j1b_nucl_square @@ -462,7 +475,7 @@ subroutine grad1_j1b_nucl(r, grad) y = r(2) - nucl_coord(i,2) z = r(3) - nucl_coord(i,3) d = x*x + y*y + z*z - e = a * dexp(-a*d) + e = a * j1b_pen_coef(i) * dexp(-a*d) fact_x += e * x fact_y += e * y @@ -515,6 +528,9 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) double precision :: r(3) double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume if(j1b_type .eq. 200) then @@ -578,8 +594,84 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - else + elseif(j1b_type .eq. 202) then + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' stop @@ -684,3 +776,76 @@ end function j12_mu_square ! --- +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 24e7e743..499ffe9d 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -284,6 +284,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f new file mode 100644 index 00000000..1100cd7c --- /dev/null +++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f @@ -0,0 +1,33 @@ +program plot_mu_of_r + implicit none + read_wf = .False. + touch read_wf + call routine_print + +end + + +subroutine routine_print + implicit none + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.mu_of_r' + i_unit_output = getUnitAndOpen(output,'w') + integer :: ipoint,nx + double precision :: xmax,xmin,r(3),dx + double precision :: mu_val, mu_der(3),dm_a,dm_b,grad + xmax = 5.D0 + xmin = -5.D0 + nx = 10000 + dx = (xmax - xmin)/dble(nx) + r = 0.d0 + r(1) = xmin + do ipoint = 1, nx + call mu_r_val_and_grad(r, r, mu_val, mu_der) + call dm_dft_alpha_beta_at_r(r,dm_a,dm_b) + grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 + grad = dsqrt(grad) + write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad + r(1) += dx + enddo +end diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d5995ae5..b2c0df31 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -100,6 +100,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra @@ -176,6 +178,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() END_PROVIDER @@ -242,6 +245,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq grad12_j12 + else PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 @@ -262,6 +267,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + endif elseif(j1b_type .ge. 100) then @@ -324,6 +331,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p call wall_time(time1) print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 450bbef0..afa10305 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,8 +84,11 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif + FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f index fabdc42e..bb8cc782 100644 --- a/src/nuclei/nuclei.irp.f +++ b/src/nuclei/nuclei.irp.f @@ -206,7 +206,12 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ] enddo nuclear_repulsion *= 0.5d0 if(point_charges)then - nuclear_repulsion += pt_chrg_nuclei_interaction + pt_chrg_interaction + print*,'bear nuclear repulsion = ',nuclear_repulsion + print*,'adding the interaction between the nuclein and the point charges' + print*,'to the usual nuclear repulsion ' + nuclear_repulsion += pt_chrg_nuclei_interaction + print*,'new nuclear repulsion = ',nuclear_repulsion + print*,'WARNING: we do not add the interaction between the point charges themselves' endif end if diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f index b955537f..66905c8c 100644 --- a/src/nuclei/point_charges.irp.f +++ b/src/nuclei/point_charges.irp.f @@ -205,5 +205,8 @@ BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_interaction] enddo print*,'Interaction between point charges and nuclei' print*,'pt_chrg_nuclei_interaction = ',pt_chrg_nuclei_interaction + if(point_charges)then + provide pt_chrg_interaction + endif END_PROVIDER diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f index 61633d3b..1942e542 100644 --- a/src/scf_utils/fock_matrix.irp.f +++ b/src/scf_utils/fock_matrix.irp.f @@ -5,6 +5,90 @@ ! Fock matrix on the MO basis. ! For open shells, the ROHF Fock Matrix is :: ! + ! | Rcc | F^b | Fcv | + ! |-----------------------| + ! | F^b | Roo | F^a | + ! |-----------------------| + ! | Fcv | F^a | Rvv | + ! + ! C: Core, O: Open, V: Virtual + ! + ! Rcc = Acc Fcc^a + Bcc Fcc^b + ! Roo = Aoo Foo^a + Boo Foo^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b + ! Fcv = (F^a + F^b)/2 + ! + ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) + ! A,B: Coupling parameters + ! + ! J. Chem. Phys. 133, 141102 (2010), https://doi.org/10.1063/1.3503173 + ! Coupling parameters from J. Chem. Phys. 125, 204110 (2006); https://doi.org/10.1063/1.2393223. + ! cc oo vv + ! A -0.5 0.5 1.5 + ! B 1.5 0.5 -0.5 + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + Fock_matrix_mo = Fock_matrix_mo_alpha + else + ! Core + do j = 1, elec_beta_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = - 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 1.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + ! Open + do j = elec_beta_num+1, elec_alpha_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + enddo + ! Virtual + do j = elec_alpha_num+1, mo_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 1.5d0 * fock_matrix_mo_alpha(i,j) & + - 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + endif + + ! Old + ! BEGIN_DOC + ! Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is :: + ! ! | F-K | F + K/2 | F | ! |---------------------------------| ! | F + K/2 | F | F - K/2 | @@ -16,64 +100,64 @@ ! ! K = Fb - Fa ! - END_DOC - integer :: i,j,n - if (elec_alpha_num == elec_beta_num) then - Fock_matrix_mo = Fock_matrix_mo_alpha - else + ! END_DOC + !integer :: i,j,n + !if (elec_alpha_num == elec_beta_num) then + ! Fock_matrix_mo = Fock_matrix_mo_alpha + !else - do j=1,elec_beta_num - ! F-K - do i=1,elec_beta_num !CC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K/2 - do i=elec_beta_num+1,elec_alpha_num !CA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_alpha_num+1, mo_num !CV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - enddo + ! do j=1,elec_beta_num + ! ! F-K + ! do i=1,elec_beta_num !CC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K/2 + ! do i=elec_beta_num+1,elec_alpha_num !CA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_alpha_num+1, mo_num !CV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! enddo - do j=elec_beta_num+1,elec_alpha_num - ! F+K/2 - do i=1,elec_beta_num !AC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_beta_num+1,elec_alpha_num !AA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_alpha_num+1, mo_num !AV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_beta_num+1,elec_alpha_num + ! ! F+K/2 + ! do i=1,elec_beta_num !AC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_beta_num+1,elec_alpha_num !AA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_alpha_num+1, mo_num !AV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - do j=elec_alpha_num+1, mo_num - ! F - do i=1,elec_beta_num !VC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_beta_num+1,elec_alpha_num !VA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K - do i=elec_alpha_num+1,mo_num !VV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & - + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_alpha_num+1, mo_num + ! ! F + ! do i=1,elec_beta_num !VC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_beta_num+1,elec_alpha_num !VA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K + ! do i=elec_alpha_num+1,mo_num !VV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & + ! + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - endif + !endif do i = 1, mo_num Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) @@ -115,8 +199,6 @@ END_PROVIDER - - BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_num,mo_num) ] implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/31.tc_bi_ortho.bats b/src/tc_bi_ortho/31.tc_bi_ortho.bats index f5b9d8c0..93bed2ab 100644 --- a/src/tc_bi_ortho/31.tc_bi_ortho.bats +++ b/src/tc_bi_ortho/31.tc_bi_ortho.bats @@ -4,46 +4,50 @@ source $QP_ROOT/tests/bats/common.bats.sh source $QP_ROOT/quantum_package.rc +function get_e() { + grep "eigval_right_tc_bi_orth" $1 | cut -d '=' -f 2 | xargs +} + function run_Ne() { - qp set_file Ne_tc_scf - qp run cisd - qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out + qp set_file Ne_tc_scf + qp run cisd + qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out eref=-128.77020441279302 - energy="$(grep "eigval_right_tc_bi_orth =" Ne_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e Ne_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "Ne" { - run_Ne + run_Ne } function run_C() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out eref=-37.757536149952514 - energy="$(grep "eigval_right_tc_bi_orth =" C_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e C_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "C" { - run_C + run_C } function run_O() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out eref=-74.908518517716161 - energy="$(grep "eigval_right_tc_bi_orth =" O_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e O_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "O" { - run_O + run_O } diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f index 08913bab..ed663f02 100644 --- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I | H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f index 3a715b44..6d5c3b21 100644 --- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index b7129d36..1d1b26cc 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -1,4 +1,4 @@ -subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo @@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) end -subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 @@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index f9728d05..ca5875c9 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,6 +1,1036 @@ ! --- +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: wall0, wall1, walli, wallf + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_v0 ...' + call wall_time(walli) + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="read") + read(11) normal_two_body_bi_orth_v0 + close(11) + + else + + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + ! --- + ! aba contraction + + print*,' Providing aba_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmp_2d) + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 + + normal_two_body_bi_orth_v0 = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_v0 + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli + +END_PROVIDER + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -11,17 +1041,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) + double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aba,hthree_aaa,hthree_aab - double precision :: wall0,wall1 print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - + call wall_time(walli) + if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -30,6 +1059,11 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -37,57 +1071,406 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - + ! --- + ! aba contraction - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + print*,' Providing aba_contraction ...' + call wall_time(wall0) - ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 + + normal_two_body_bi_orth = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo @@ -95,9 +1478,481 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP END DO !$OMP END PARALLEL - deallocate( occ ) - deallocate( key_i_core ) - endif + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord if(write_tc_norm_ord.and.mpi_master) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") @@ -107,245 +1962,10 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth ', wallf-walli END_PROVIDER +! --- - -subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, integral - - !!!! double alpha/beta - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) - int_exc_12 = -1.d0 * integral - hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12) - enddo - do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) - int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12) - enddo - -end subroutine give_aba_contraction - - - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: h1, p1, h2, p2, i - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - normal_two_body_bi_orth_ab = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - - - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for same spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i,ii,j,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aab, hthree_aaa - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - normal_two_body_bi_orth_aa_bb = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1 , n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1 , n_act_orb - p2 = list_act(pp2) - if(h1h2 - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - - - -subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - BEGIN_DOC -! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 - END_DOC - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii,i - double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 - double precision :: integral,int_exc_l,int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) - int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) - int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) - int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 ) - enddo - do ii = Ne(2)+1,Ne(1) ! purely open-shell part - i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) - int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) - int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) - int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 )) - enddo - -end subroutine give_aaa_contraction - - - -subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - implicit none - use bitmasks ! you need to include the bitmasks_module.f90 features - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 - double precision :: integral, int_exc_l, int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral) - int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct - int_exc_23 - enddo - -end subroutine give_aab_contraction diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f new file mode 100644 index 00000000..d11c6727 --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -0,0 +1,2022 @@ + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction_v0 ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction_v0 = -0.5d0 * no_aba_contraction_v0 + call sum_A_At(no_aba_contraction_v0(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction_v0', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction_v0 ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction_v0 = -0.5d0 * no_aab_contraction_v0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction_v0, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction_v0(p2,h2,p1,h1) -= no_aab_contraction_v0(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction_v0(p2,h2,p1,h1) = -no_aab_contraction_v0(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction_v0(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction_v0', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction_v0 ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction_v0 = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction_v0 = -0.5d0 * no_aaa_contraction_v0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction_v0, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction_v0(p2,h2,p1,h1) -= no_aaa_contraction_v0(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction_v0(p2,h2,p1,h1) = -no_aaa_contraction_v0(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction_v0(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction_v0', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aab_contraction) + + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f new file mode 100644 index 00000000..417580dd --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -0,0 +1,392 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_old ...' + call wall_time(wall0) + + PROVIDE N_int + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + read(11) normal_two_body_bi_orth_old + close(11) + + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_old = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth_old) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_old + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth_old ', wall1-wall0 + +END_PROVIDER + +! --- + +subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, integral + + !!!! double alpha/beta + hthree = 0.d0 + + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) + enddo + + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + + return +end + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: h1, p1, h2, p2, i + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_ab = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for same spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i,ii,j,h1,p1,h2,p2 + integer :: hh1,hh2,pp1,pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree_aab, hthree_aaa + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op)then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + normal_two_body_bi_orth_aa_bb = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1 , n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1 , n_act_orb + p2 = list_act(pp2) + if(h1h2 + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + BEGIN_DOC + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii,i + double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 + double precision :: integral,int_exc_l,int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13= -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) + enddo + + do ii = Ne(2)+1,Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + !hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + hthree += 0.5d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + enddo + + return +end + +! --- + +subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 + double precision :: integral, int_exc_l, int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct - int_exc_23 + enddo + + return +end + +! --- + diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 0cf3ca87..0c4198a9 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -49,12 +49,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index 50d9dd45..9cb9a600 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -36,11 +36,11 @@ subroutine routine e_corr_abs = 0.d0 e_corr_pos = 0.d0 e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f similarity index 74% rename from src/tc_bi_ortho/slater_tc_3e.irp.f rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 7b73d5f2..49977f37 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -1,23 +1,5 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort - endif -if(.not.double_normal_ord)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort -else - PROVIDE normal_two_body_bi_orth -endif -end -subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) +subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -50,28 +32,28 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) if(Ne(1)+Ne(2).ge.3)then !! ! alpha/alpha/beta three-body do i = 1, Ne(1) - ii = occ(i,1) + ii = occ(i,1) do j = i+1, Ne(1) - jj = occ(j,1) + jj = occ(j,1) do m = 1, Ne(2) - mm = occ(m,2) -! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR -! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + mm = occ(m,2) +! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR +! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR hthree += direct_int - exchange_int enddo enddo enddo - + ! beta/beta/alpha three-body do i = 1, Ne(2) - ii = occ(i,2) + ii = occ(i,2) do j = i+1, Ne(2) - jj = occ(j,2) + jj = occ(j,2) do m = 1, Ne(1) - mm = occ(m,1) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + mm = occ(m,1) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) hthree += direct_int - exchange_int enddo @@ -82,10 +64,10 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) do i = 1, Ne(1) ii = occ(i,1) ! 1 do j = i+1, Ne(1) - jj = occ(j,1) ! 2 + jj = occ(j,1) ! 2 do m = j+1, Ne(1) - mm = occ(m,1) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR + mm = occ(m,1) ! 3 +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -98,7 +80,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) jj = occ(j,2) ! 2 do m = j+1, Ne(2) mm = occ(m,2) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -108,13 +90,13 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) end -subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -128,7 +110,7 @@ subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk integer :: degree,exc(0:2,2,2) integer :: h1, p1, h2, p2, s1, s2 - double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin + double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin double precision :: sym_3_e_int_from_6_idx_tensor integer :: other_spin(2) integer(bit_kind) :: key_j_core(Nint,2),key_i_core(Nint,2) @@ -160,26 +142,26 @@ subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) ! alpha/alpha/beta three-body ! print*,'IN SLAT RULES' if(Ne(1)+Ne(2).ge.3)then - ! hole of spin s1 :: contribution from purely other spin + ! hole of spin s1 :: contribution from purely other spin ispin = other_spin(s1) ! ispin is the other spin than s1 - do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 - ii = occ(i,ispin) - do j = i+1, Ne(ispin) ! j has the same spin than s1 - jj = occ(j,ispin) + do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 + ii = occ(i,ispin) + do j = i+1, Ne(ispin) ! j has the same spin than s1 + jj = occ(j,ispin) ! is == ispin in ::: s1 is is s1 is is s1 is is s1 is is ! < h1 j i | p1 j i > - < h1 j i | p1 i j > - ! - direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) - exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) + ! + direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) + exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) hthree += direct_int - exchange_int enddo enddo - + ! hole of spin s1 :: contribution from mixed other spin / same spin - do i = 1, Ne(ispin) ! other spin - ii = occ(i,ispin) ! other spin - do j = 1, Ne(s1) ! same spin - jj = occ(j,s1) ! same spin + do i = 1, Ne(ispin) ! other spin + ii = occ(i,ispin) ! other spin + do j = 1, Ne(s1) ! same spin + jj = occ(j,s1) ! same spin direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) exchange_int = three_e_4_idx_exch13_bi_ort(jj,ii,p1,h1) ! < h1 j i | p1 j i > - < h1 j i | j p1 i > @@ -192,8 +174,8 @@ subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) - hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) + hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo endif @@ -203,13 +185,13 @@ end ! --- -subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -253,29 +235,30 @@ subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) - + if(Ne(1)+Ne(2).ge.3)then - if(s1==s2)then ! same spin excitation + if(s1==s2)then ! same spin excitation ispin = other_spin(s1) do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1) mm = occ(m,ispin) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) hthree += direct_int - exchange_int enddo - do m = 1, Ne(s1) ! pure contribution from s1 + do m = 1, Ne(s1) ! pure contribution from s1 mm = occ(m,s1) hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1) - enddo - else ! different spin excitation + enddo + else ! different spin excitation do m = 1, Ne(s1) - mm = occ(m,s1) ! - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + mm = occ(m,s1) ! + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int enddo do m = 1, Ne(s2) - mm = occ(m,s2) ! + mm = occ(m,s2) ! direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a19d4688..ceefbfb8 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,3 +1,37 @@ +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord .and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + + endif + + return +end + +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 5a3f9935..531f0141 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -7,11 +7,11 @@ ! Various component of the TC energy for the reference "HF" Slater determinant END_DOC double precision :: hmono, htwoe, htot, hthree - call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc)then - call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree) + call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na @@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 1b0e43bb..12bbbec0 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -2,17 +2,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: hmono, htwoe, hthree, htot integer :: occ(Nint*bit_kind_size,2) @@ -39,33 +39,33 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif endif else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) endif @@ -112,72 +112,76 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) !DIR$ FORCEINLINE call bitstring_to_list_ab(particle, occ_particle, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha - ASSERT (tmp(2) == nexc(2)) ! Number of particle beta + ASSERT (tmp(2) == nexc(2)) ! Number of particle beta !DIR$ FORCEINLINE call bitstring_to_list_ab(hole, occ_hole, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha - ASSERT (tmp(2) == nexc(2)) ! Number of holes beta + ASSERT (tmp(2) == nexc(2)) ! Number of holes beta if(s1==s2.and.s1==1)then !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc - hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 1 ! i==alpha ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 2 ! i==beta ==> alpha/alpha/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - elseif(s1==s2.and.s1==2)then + elseif(s1==s2.and.s1==2)then !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 2 ! i==beta ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 1 ! i==alpha==> beta/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo - else ! (h1,p1) == alpha/(h2,p2) == beta + else ! (h1,p1) == alpha/(h2,p2) == beta hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! - ispin = 1 ! i==alpha ==> alpha/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 1 ! i==alpha ==> alpha/beta/alpha terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -188,8 +192,8 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1) hthree -= direct_int - exchange_int enddo - ispin = 2 ! i==beta ==> alpha/beta/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 2 ! i==beta ==> alpha/beta/beta terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h2,p2) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -207,7 +211,7 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations ! ! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha END_DOC @@ -222,16 +226,16 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_ab = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) - do hh2 = 1, n_act_orb !! beta - h2 = list_act(hh2) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) + do hh2 = 1, n_act_orb !! beta + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) - do pp2 = 1, n_act_orb !! beta + do pp2 = 1, n_act_orb !! beta p2 = list_act(pp2) call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib @@ -242,25 +246,25 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == beta contrib = 0.d0 - do mm = 1, Ne(1) !! alpha + do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and m exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -268,7 +272,7 @@ subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h2,p2) and m exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -278,11 +282,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations ! ! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -297,13 +301,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_aa = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! alpha - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! alpha @@ -317,20 +321,20 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == alpha contrib = 0.d0 do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution @@ -340,9 +344,10 @@ subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -351,11 +356,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations ! ! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -370,13 +375,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_bb = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! beta - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! beta + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! beta - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! beta p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! beta @@ -390,18 +395,18 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int !! h1,p1 == beta !! h2,p2 == beta @@ -413,9 +418,10 @@ subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -424,17 +430,17 @@ end subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: htot double precision :: hmono, htwoe @@ -461,17 +467,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) endif htwoe *= phase - htot = htwoe + htot = htwoe end diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 2f9d83bf..9719a6e7 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h htwoe -= buffer_x(i) enddo hthree = 0.d0 - if (three_body_h_tc.and.elec_num.gt.2)then + if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then call three_comp_fock_elem(key_i,h,p,spin,hthree) endif @@ -243,7 +243,9 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree, do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO + ! use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree += direct_int - exchange_int enddo else !! ispin NE to ispin_fock @@ -322,7 +324,8 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree -= direct_int - exchange_int enddo else !! ispin NE to ispin_fock diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f similarity index 85% rename from src/tc_bi_ortho/slater_tc.irp.f rename to src/tc_bi_ortho/slater_tc_slow.irp.f index 2c0ae2ca..1833d20f 100644 --- a/src/tc_bi_ortho/slater_tc.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) BEGIN_DOC ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) if(degree.gt.2)then htot = 0.d0 else - call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) endif -end subroutine htilde_mu_mat_bi_ortho_tot +end subroutine htilde_mu_mat_bi_ortho_tot_slow ! -- -subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) +subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) BEGIN_DOC ! @@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot if(degree.gt.2) return if(degree == 0)then - call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) else if (degree == 1)then - call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2)then - call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) endif if(three_body_h_tc) then if(degree == 2) then - if(.not.double_normal_ord) then - call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) + if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then + call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) endif - else if(degree == 1) then - call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) - else if(degree == 0) then - call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) + else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then + call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) + else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then + call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) endif endif @@ -89,7 +89,7 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) +subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) BEGIN_DOC ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS @@ -188,7 +188,7 @@ end -subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS @@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) return endif -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! do i = 1, Nint -! key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) -! key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) -! enddo -! call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) -! else call bitstring_to_list_ab(key_i, occ, Ne, Nint) -! endif call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) @@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) ! opposite spin two-body ! key_j, key_i htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ??? endif else @@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ??? htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ??? endif @@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) end -subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e4f7ca93..3180d946 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -96,9 +96,11 @@ double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i) ! direct - three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations + three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_1_bi_ort(j,m,k,i) & ! two cyclic permutations - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i) & ! two first exchange - - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange + - three_e_4_idx_exch13_bi_ort(j,m,k,i) ! last exchange + ! TODO + ! use transpose end double precision function three_e_double_parrallel_spin(m,l,j,k,i) @@ -107,5 +109,6 @@ double precision function three_e_double_parrallel_spin(m,l,j,k,i) three_e_double_parrallel_spin = three_e_5_idx_direct_bi_ort(m,l,j,k,i) ! direct three_e_double_parrallel_spin += three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) & ! two cyclic permutations - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) & ! two first exchange - - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange +! - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange + - three_e_5_idx_direct_bi_ort(m,l,i,k,j) ! last exchange end diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4ae44148..4c3c0788 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -11,10 +11,10 @@ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) dressing_dets = 0.d0 do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 @@ -29,7 +29,7 @@ vec_tmp(istate,istate) = 1.d0 enddo print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) do i = 1, N_det e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) enddo @@ -41,8 +41,8 @@ it = 0 dressing_dets = 0.d0 double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav + external htc_bi_ortho_calc_tdav_slow + external htcdag_bi_ortho_calc_tdav_slow logical :: converged do while (dabs(E_before-E_current).gt.thr) it += 1 @@ -66,7 +66,7 @@ do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) print*,'outside Davidson' print*,'eigval_tmp(1) = ',eigval_tmp(1) do i = 1, N_det diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a83d6cd0..fa946d6a 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -207,8 +207,6 @@ end else ! n_det > N_det_max_full double precision, allocatable :: H_jj(:),vec_tmp(:,:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt @@ -217,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' @@ -259,7 +257,6 @@ end do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. i_it = 0 diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 3353d3e7..ec072531 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -9,33 +9,25 @@ implicit none integer :: i, j - double precision :: hmono,htwoe,hthree,htot + double precision :: htot PROVIDE N_int i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) & + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det do j = 1, N_det ! < J | Htilde | I > - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !print *, ' hmono = ', hmono - !print *, ' htwoe = ', htwoe - !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo !$OMP END PARALLEL DO -! print*,'htilde_matrix_elmt_bi_ortho = ' -! do i = 1, min(100,N_det) -! write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i) -! enddo - END_PROVIDER diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f index 291c52ef..a7e4d09e 100644 --- a/src/tc_bi_ortho/tc_som.irp.f +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -56,8 +56,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..9023e2f0 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -12,7 +12,7 @@ subroutine write_tc_energy() do i = 1, N_det do j = 1, N_det !htot = htilde_matrix_elmt_bi_ortho(i,j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot enddo @@ -38,15 +38,16 @@ subroutine write_tc_var() implicit none integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot + double precision :: hmono, htwoe, hthree, htot_1j, htot_j1 double precision :: SIGMA_TC do k = 1, n_states SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) - SIGMA_TC = SIGMA_TC + htot * htot + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo print *, " state : ", k diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index 118e481a..cb0c355c 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -35,7 +35,7 @@ subroutine test det_i = ref_bitmask call do_single_excitation(det_i,h1,p1,s1,i_ok) call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree *= phase @@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4debe2e2..1f7bdfda 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo ! Preparing the left-eigenvector print*,'Computing the left-eigenvector ' diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 6721c285..902f7295 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,12 +11,17 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_h_u0 +! call test_h_u0 ! call test_slater_tc_opt ! call timing_tot ! call timing_diag ! call timing_single ! call timing_double + + call test_no() + !call test_no_aba() + !call test_no_aab() + !call test_no_aaa() end subroutine test_h_u0 @@ -31,7 +36,7 @@ subroutine test_h_u0 u_0(i) = psi_r_coef_bi_ortho(i,1) enddo call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det) + call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det) print*,'difference right ' accu = 0.d0 do i = 1, N_det @@ -42,7 +47,7 @@ subroutine test_h_u0 do_right = .False. v_0_new = 0.d0 call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right) + call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right) print*,'difference left' accu = 0.d0 do i = 1, N_det @@ -63,7 +68,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -99,7 +104,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -146,7 +151,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -183,7 +188,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -225,7 +230,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -252,3 +257,169 @@ subroutine timing_double end +! --- + +subroutine test_no() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing normal_two_body_bi_orth ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_old + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 + + return +end + +! --- + +subroutine test_no_aba() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aba_contraction ...' + + thr = 1d-8 + + PROVIDE no_aba_contraction_v0 + PROVIDE no_aba_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aba_contraction (l,k,j,i) + ref = no_aba_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aba_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aba_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + + +subroutine test_no_aab() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aab_contraction ...' + + thr = 1d-8 + + PROVIDE no_aab_contraction_v0 + PROVIDE no_aab_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aab_contraction (l,k,j,i) + ref = no_aab_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aab_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aab_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + +subroutine test_no_aaa() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aaa_contraction ...' + + thr = 1d-8 + + PROVIDE no_aaa_contraction_v0 + PROVIDE no_aaa_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aaa_contraction (l,k,j,i) + ref = no_aaa_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aaa_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index ebd43a7a..b7de067f 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -25,8 +25,7 @@ subroutine test_3e implicit none double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) -! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree) + call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) print*,'hmono = ',hmono print*,'htwoe = ',htwoe print*,'hthree= ',hthree @@ -88,7 +87,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -156,7 +155,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index de638da9..ea1503c3 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml default: True +[three_e_3_idx_term] +type: logical +doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_4_idx_term] +type: logical +doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_5_idx_term] +type: logical +doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + [pure_three_body_h_tc] type: logical doc: If |true|, pure triple excitation three-body terms are included @@ -112,6 +130,12 @@ doc: exponents of the 1-body Jastrow interface: ezfio size: (nuclei.nucl_num) +[j1b_pen_coef] +type: double precision +doc: coefficients of the 1-body Jastrow +interface: ezfio +size: (nuclei.nucl_num) + [j1b_coeff] type: double precision doc: coeff of the 1-body Jastrow @@ -130,6 +154,12 @@ doc: a parameter used to define mu(r) interface: ezfio, provider, ocaml default: 6.203504908994001e-1 +[beta_rho_power] +type: double precision +doc: a parameter used to define mu(r) +interface: ezfio, provider, ocaml +default: 0.5 + [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 57250b52..ebcd5107 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -1,17 +1,22 @@ ! --- -BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] + BEGIN_PROVIDER [ double precision, j1b_pen , (nucl_num) ] +&BEGIN_PROVIDER [ double precision, j1b_pen_coef, (nucl_num) ] BEGIN_DOC - ! exponents of the 1-body Jastrow + ! parameters of the 1-body Jastrow END_DOC implicit none logical :: exists + integer :: i + integer :: ierr PROVIDE ezfio_filename + ! --- + if (mpi_master) then call ezfio_has_tc_keywords_j1b_pen(exists) endif @@ -23,7 +28,6 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] IRP_IF MPI include 'mpif.h' - integer :: ierr call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then stop 'Unable to read j1b_pen with MPI' @@ -31,7 +35,6 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] IRP_ENDIF if (exists) then - if (mpi_master) then write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen ] <<<<< ..' call ezfio_get_tc_keywords_j1b_pen(j1b_pen) @@ -42,19 +45,54 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] endif IRP_ENDIF endif - else - - integer :: i do i = 1, nucl_num j1b_pen(i) = 1d5 enddo - endif - print*,'parameters for nuclei jastrow' - do i = 1, nucl_num - print*,'i,Z,j1b_pen(i)',i,nucl_charge(i),j1b_pen(i) - enddo + + ! --- + + if (mpi_master) then + call ezfio_has_tc_keywords_j1b_pen_coef(exists) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + + IRP_IF MPI + call MPI_BCAST(j1b_pen_coef, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read j1b_pen_coef with MPI' + endif + IRP_ENDIF + + if (exists) then + if (mpi_master) then + write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen_coef ] <<<<< ..' + call ezfio_get_tc_keywords_j1b_pen_coef(j1b_pen_coef) + IRP_IF MPI + call MPI_BCAST(j1b_pen_coef, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read j1b_pen_coef with MPI' + endif + IRP_ENDIF + endif + else + do i = 1, nucl_num + j1b_pen_coef(i) = 1d0 + enddo + endif + + ! --- + + print *, ' parameters for nuclei jastrow' + print *, ' i, Z, j1b_pen, j1b_pen_coef' + do i = 1, nucl_num + print *, i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) + enddo END_PROVIDER @@ -114,3 +152,4 @@ BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ] END_PROVIDER ! --- + diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 20260a95..0504373c 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -11,6 +11,7 @@ subroutine rh_tcscf_diis() integer :: i, j, it integer :: dim_DIIS, index_dim_DIIS + logical :: converged double precision :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta double precision :: tc_grad, g_save, g_delta, g_delta_th double precision :: level_shift_save, rate_th @@ -92,8 +93,9 @@ subroutine rh_tcscf_diis() PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + converged = .false. !do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. dsqrt(thresh_tcscf))) - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) + do while(.not. converged) call wall_time(t0) @@ -218,21 +220,56 @@ subroutine rh_tcscf_diis() !g_delta_th = dabs(tc_grad) ! g_delta) er_delta_th = dabs(er_DIIS) !er_delta) + converged = er_DIIS .lt. dsqrt(thresh_tcscf) + call wall_time(t1) !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & ! it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + +! Write data in JSON file + + call lock_io + if (it == 1) then + write(json_unit, json_dict_uopen_fmt) + else + write(json_unit, json_dict_close_uopen_fmt) + endif + write(json_unit, json_int_fmt) ' iteration ', it + write(json_unit, json_real_fmt) ' SCF TC Energy ', etc_tot + write(json_unit, json_real_fmt) ' E(1e) ', etc_1e + write(json_unit, json_real_fmt) ' E(2e) ', etc_2e + write(json_unit, json_real_fmt) ' E(3e) ', etc_3e + write(json_unit, json_real_fmt) ' delta Energy ', e_delta + write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS + write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf + write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 + call unlock_io + if(er_delta .lt. 0.d0) then call ezfio_set_tc_scf_bitc_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + call lock_io + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + call unlock_io if(qp_stop()) exit enddo + write(json_unit, json_dict_close_fmtx) + ! --- print *, ' TCSCF DIIS converged !' diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 88ddd26c..04c4f92d 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,6 +8,8 @@ program tc_scf implicit none + write(json_unit,json_array_open_fmt) 'tc-scf' + print *, ' starting ...' my_grid_becke = .True. @@ -57,6 +59,8 @@ program tc_scf endif + write(json_unit,json_array_close_fmtx) + call json_close end diff --git a/src/tools/print_wf.irp.f b/src/tools/print_wf.irp.f index 64eb1a1f..9621ee89 100644 --- a/src/tools/print_wf.irp.f +++ b/src/tools/print_wf.irp.f @@ -47,6 +47,7 @@ subroutine routine do i = 1, min(N_det_print_wf,N_det) print*,'' print*,'i = ',i + print *,psi_det_sorted(1,1,i) call debug_det(psi_det_sorted(1,1,i),N_int) call get_excitation_degree(psi_det_sorted(1,1,i),psi_det_sorted(1,1,1),degree,N_int) print*,'degree = ',degree diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 8606e908..8c11478e 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file interface: ezfio, ocaml, provider default: None -[export_rdm] +[export_basis] type: logical -doc: If True, export two-body reduced density matrix +doc: If True, export basis set and AOs interface: ezfio, ocaml, provider -default: False +default: True + +[export_mos] +type: logical +doc: If True, export basis set and AOs +interface: ezfio, ocaml, provider +default: True [export_ao_one_e_ints] type: logical @@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis interface: ezfio, ocaml, provider default: False -[export_mo_one_e_ints] -type: logical -doc: If True, export one-electron integrals in MO basis -interface: ezfio, ocaml, provider -default: False - [export_ao_two_e_ints] type: logical doc: If True, export two-electron integrals in AO basis @@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis interface: ezfio, ocaml, provider default: False +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + [export_mo_two_e_ints] type: logical doc: If True, export two-electron integrals in MO basis @@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis interface: ezfio, ocaml, provider default: False +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f index 3ae0dcb4..f9ecc17f 100644 --- a/src/trexio/export_trexio.irp.f +++ b/src/trexio/export_trexio.irp.f @@ -2,6 +2,6 @@ program export_trexio_prog implicit none read_wf = .True. SOFT_TOUCH read_wf - call export_trexio + call export_trexio(.False.) end diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c55ddc5e..f25ae370 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -1,15 +1,17 @@ -subroutine export_trexio +subroutine export_trexio(update) use trexio implicit none BEGIN_DOC ! Exports the wave function in TREXIO format END_DOC + logical, intent(in) :: update integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc integer :: k double precision, allocatable :: factor(:) character*(256) :: filenames(N_states) + character :: rw filenames(1) = trexio_filename do k=2,N_states @@ -18,15 +20,26 @@ subroutine export_trexio do k=1,N_states print *, 'TREXIO file : ', trim(filenames(k)) - call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + if (update) then + call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + else + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + endif enddo print *, '' + if (update) then + rw = 'u' + else + rw = 'w' + endif + + do k=1,N_states if (backend == 0) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc) else if (backend == 1) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc) endif if (f(k) == 0_8) then print *, 'Unable to open TREXIO file for writing' @@ -171,92 +184,95 @@ subroutine export_trexio endif + if (export_basis) then + ! Basis ! ----- - print *, 'Basis' + print *, 'Basis' + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f(1), prim_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f(1), shell_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) - call trexio_assert(rc, TREXIO_SUCCESS) + allocate(factor(shell_num)) +! if (ao_normalized) then +! factor(1:shell_num) = shell_normalization_factor(1:shell_num) +! else + factor(1:shell_num) = 1.d0 +! endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(shell_num)) - if (ao_normalized) then - factor(1:shell_num) = shell_normalization_factor(1:shell_num) - else - factor(1:shell_num) = 1.d0 - endif - rc = trexio_write_basis_shell_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - deallocate(factor) + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_index(f(1), shell_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f(1), prim_expo) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f(1), prim_coef) - call trexio_assert(rc, TREXIO_SUCCESS) - - allocate(factor(prim_num)) - if (primitives_normalized) then - factor(1:prim_num) = prim_normalization_factor(1:prim_num) - else - factor(1:prim_num) = 1.d0 - endif - rc = trexio_write_basis_prim_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) ! Atomic orbitals ! --------------- - print *, 'AOs' + print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - allocate(factor(ao_num)) - if (ao_normalized) then - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - else - factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) ! One-e AO integrals ! ------------------ @@ -375,28 +391,30 @@ subroutine export_trexio ! Molecular orbitals ! ------------------ - print *, 'MOs' + if (export_mos) then + print *, 'MOs' - rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) - call trexio_assert(rc, TREXIO_SUCCESS) - - do k=1,N_states - rc = trexio_write_mo_num(f(k), mo_num) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - if ( (trim(mo_label) == 'Canonical').and. & - (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) - call trexio_assert(rc, TREXIO_SUCCESS) - ! One-e MO integrals ! ------------------ diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 9f9ad9d6..8c6b79d7 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -3,6 +3,7 @@ program import_integrals_ao implicit none integer(trexio_t) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc + PROVIDE mo_num f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) if (f == 0_8) then @@ -42,10 +43,10 @@ subroutine run(f) if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) - call trexio_assert(rc, TREXIO_SUCCESS) if (rc /= TREXIO_SUCCESS) then print *, irp_here, rc print *, 'Error reading nuclear repulsion' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_nuclei_nuclear_repulsion(s) @@ -63,6 +64,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO overlap' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) @@ -74,6 +76,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO kinetic integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) @@ -85,6 +88,7 @@ subroutine run(f) ! if (rc /= TREXIO_SUCCESS) then ! print *, irp_here ! print *, 'Error reading AO ECP local integrals' +! call trexio_assert(rc, TREXIO_SUCCESS) ! stop -1 ! endif ! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) @@ -96,6 +100,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO potential N-e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) @@ -106,41 +111,112 @@ subroutine run(f) ! AO 2e integrals ! --------------- - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int(f) + PROVIDE ao_num + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map - integer*8 :: offset, icount + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + else + print *, 'AO integrals not found in TREXIO file' + endif + + ! MO integrals + ! ------------ + + allocate(A(mo_num, mo_num)) + if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then + rc = trexio_read_mo_1e_int_core_hamiltonian(f, A) if (rc /= TREXIO_SUCCESS) then - exit + print *, irp_here + print *, 'Error reading MO 1e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) + stop -1 endif - end do - n_integrals = offset + call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A) + call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read') + endif + deallocate(A) - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + ! MO 2e integrals + ! --------------- - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + rc = trexio_has_mo_2e_int(f) + if (rc /= TREXIO_HAS_NOT) then + + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + else + print *, 'MO integrals not found in TREXIO file' + endif end diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index ff17ee4e..b548b18a 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,7 +86,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 @@ -468,8 +468,6 @@ end subroutine - - subroutine multiply_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -484,33 +482,292 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ndtmp integer :: ib, ic, id, k - if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0 - continue - else - return - endif - ndtmp = nb+nc + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + select case (nb) + case (0) + call multiply_poly_b0(b,c,nc,d,nd) + return + case (1) + call multiply_poly_b1(b,c,nc,d,nd) + return + case (2) + call multiply_poly_b2(b,c,nc,d,nd) + return + end select + + select case (nc) + case (0) + call multiply_poly_c0(b,nb,c,d,nd) + return + case (1) + call multiply_poly_c1(b,nb,c,d,nd) + return + case (2) + call multiply_poly_c2(b,nb,c,d,nd) + return + end select + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b0(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:nc) + + integer :: ndtmp + integer :: ic, id, k + if(nc < 0) return !False if nc>=0 do ic = 0,nc d(ic) = d(ic) + c(ic) * b(0) enddo - do ib=1,nb - d(ib) = d(ib) + c(0) * b(ib) - do ic = 1,nc - d(ib+ic) = d(ib+ic) + c(ic) * b(ib) - enddo - enddo - - do nd = ndtmp,0,-1 - if (d(nd) == 0.d0) then - cycle - endif - exit + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit enddo end +subroutine multiply_poly_b1(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ic = 1,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + + end select + + do nd = 1+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b2(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + d(2) = d(2) + c(0) * b(2) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + d(3) = d(3) + c(1) * b(2) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + c(1) * b(2) + d(4) = d(4) + c(2) * b(2) + + case default + + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ic = 2,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + c(ic-2) * b(2) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + c(nc-1) * b(2) + d(nc+2) = d(nc+2) + c(nc) * b(2) + + end select + + do nd = 2+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c0(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:0) + double precision, intent(inout) :: d(0:nb) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + do ib=0,nb + d(ib) = d(ib) + c(0) * b(ib) + enddo + + do nd = nb,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c1(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:1) + double precision, intent(inout) :: d(0:nb+1) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ib=1,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + + end select + + do nd = nb+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c2(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + + + subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) implicit none BEGIN_DOC @@ -685,11 +942,11 @@ end subroutine recentered_poly2_v subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points) BEGIN_DOC - ! + ! ! Recenter two polynomials. Special case for b=(0,0,0) - ! + ! ! (x - A)^a (x - B)^0 = (x - P + P - A)^a (x - Q + Q - B)^0 - ! = (x - P + P - A)^a + ! = (x - P + P - A)^a ! END_DOC diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3b43d607..76a539a6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! -integer :: ndim -integer, intent(inout) :: rank -double precision, dimension(ndim, ndim), intent(inout) :: A -double precision, dimension(ndim, rank), intent(out) :: U -double precision, intent(in) :: tol +integer :: ndim +integer, intent(inout) :: rank +double precision, intent(inout) :: A(ndim, ndim) +double precision, intent(out) :: U(ndim, rank) +double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work -character, parameter :: uplo = "U" -integer :: N, LDA +character, parameter :: uplo = 'L' +integer :: LDA integer :: info integer :: k, l, rank0 -external :: dpstrf rank0 = rank -N = size(A, dim=1) -LDA = N -allocate(piv(N)) -allocate(work(2*N)) -call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info) +LDA = ndim +allocate(piv(ndim)) +allocate(work(2*ndim)) +call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info) if (rank > rank0) then print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling' stop end if -do k = 1, N - A(k+1:, k) = 0.00D+0 +do k = 1, ndim + A(k,k+1:ndim) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 -do k = 1, N +do k = 1, ndim l = piv(k) - U(l, :) = A(1:rank, k) + U(l, 1:rank) = A(k,1:rank) end do end subroutine pivoted_cholesky diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index aba99c2b..a9f1a438 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -490,7 +490,7 @@ end subroutine check_sym subroutine sum_A_At(A, N) !BEGIN_DOC - ! useful for symmetrizing a tensor without a temporary tensor + ! add a tensor with its transpose without a temporary tensor !END_DOC implicit none @@ -521,3 +521,38 @@ subroutine sum_A_At(A, N) end +! --- + +subroutine sub_A_At(A, N) + + !BEGIN_DOC + ! substruct a tensor with its transpose without a temporary tensor + !END_DOC + + implicit none + integer, intent(in) :: N + double precision, intent(inout) :: A(N,N) + integer :: i, j + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j) & + !$OMP SHARED (A, N) + !$OMP DO + do j = 1, N + do i = j, N + A(i,j) -= A(j,i) + enddo + enddo + !$OMP END DO + + !$OMP DO + do j = 2, N + do i = 1, j-1 + A(i,j) = -A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index 71ee87e3..fb6d9034 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -46,17 +46,11 @@ doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation t interface: ezfio,ocaml,provider default: MP -[cc_write_t1] -type: logical -doc: If true, it will write on disk the T1 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False - -[cc_write_t2] -type: logical -doc: If true, it will write on disk the T2 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False +[io_amplitudes] +type: Disk_access +doc: Read/Write |CCSD| amplitudes from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None [cc_par_t] type: logical diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f index 33e0cbae..fc1451ba 100644 --- a/src/utils_cc/energy.irp.f +++ b/src/utils_cc/energy.irp.f @@ -5,9 +5,8 @@ subroutine det_energy(det,energy) integer(bit_kind), intent(in) :: det double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem - call i_H_j(det,det,N_int,energy) + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion - energy = energy + nuclear_repulsion - end diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f index 42acdf78..bb26e133 100644 --- a/src/utils_cc/guess_t.irp.f +++ b/src/utils_cc/guess_t.irp.f @@ -91,16 +91,17 @@ subroutine write_t1(nO,nV,t1) double precision, intent(in) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit + integer, external :: getunitandopen - if (cc_write_t1) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','w') do a = 1, nV do i = 1, nO - write(11,'(F20.12)') t1(i,a) + write(iunit,'(F20.12)') t1(i,a) enddo enddo - close(11) + close(iunit) endif end @@ -120,20 +121,21 @@ subroutine write_t2(nO,nV,t2) double precision, intent(in) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit + integer, external :: getunitandopen - if (cc_write_t2) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','w') do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - write(11,'(F20.12)') t2(i,j,a,b) + write(iunit,'(F20.12)') t2(i,j,a,b) enddo enddo enddo enddo - close(11) + close(iunit) endif end @@ -153,23 +155,19 @@ subroutine read_t1(nO,nV,t1) double precision, intent(out) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit logical :: ok + integer, external :: getunitandopen - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t1 = True' - print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') - do a = 1, nV - do i = 1, nO - read(11,'(F20.12)') t1(i,a) + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','r') + do a = 1, nV + do i = 1, nO + read(iunit,'(F20.12)') t1(i,a) + enddo enddo - enddo - close(11) + close(iunit) + endif end @@ -188,26 +186,23 @@ subroutine read_t2(nO,nV,t2) double precision, intent(out) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit logical :: ok - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t2 = True' - print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - read(11,'(F20.12)') t2(i,j,a,b) + integer, external :: getunitandopen + + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','r') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(iunit,'(F20.12)') t2(i,j,a,b) + enddo enddo enddo enddo - enddo - close(11) + close(iunit) + endif end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 9e244d82..485d7002 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) integer :: i1,i2,idx1,idx2 allocate(tmp_F(mo_num,mo_num)) - + call get_fock_matrix_spin(det,1,tmp_F) !$OMP PARALLEL & @@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) !$OMP END PARALLEL deallocate(tmp_F) - + end ! V @@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) double precision, intent(out) :: v(n1,n2,n3,n4) - integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO do i4 = 1, n4 - do i3 = 1, n3 - do i2 = 1, n2 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) do i1 = 1, n1 - idx4 = list4(i4) - idx3 = list3(i3) - idx2 = list2(i2) idx1 = list1(i1) - v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + + end ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] - implicit none - - integer :: i,j,k,l - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map - + integer :: i1,i2,i3,i4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & - !$OMP PRIVATE(i,j,k,l) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do l = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do i = 1, mo_num - cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + END_PROVIDER ! oooo @@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) - + do q = 1, cc_n_mo do p = 1, cc_n_mo cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) @@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] enddo FREE cc_space_v_vvvv - + END_PROVIDER ! iaia @@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) - + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) !$OMP PARALLEL & @@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) - + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) !$OMP PARALLEL & @@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift) else shift = n_S(1) endif - + end ! F @@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) ! Compute the Fock matrix corresponding to two lists of spin orbitals. ! Ex: occ/occ, occ/vir,... END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) integer, intent(in) :: list1(n1,2), list2(n2,2) integer, intent(in) :: dim1, dim2 - + double precision, intent(out) :: f(dim1, dim2) double precision, allocatable :: tmp_F(:,:) integer :: i,j, idx_i,idx_j,i_shift,j_shift integer :: tmp_i,tmp_j integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals allocate(tmp_F(mo_num,mo_num)) - + do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 @@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) enddo enddo - + deallocate(tmp_F) - + end ! Get F @@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f) BEGIN_DOC ! Fock matrix alpha or beta of an arbitrary det END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: s - + double precision, intent(out) :: f(mo_num,mo_num) - + integer :: p,q,i,s1,s2 integer(bit_kind) :: res(N_int,2) logical :: ok @@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f) s1 = 2 s2 = 1 endif - + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + !$OMP PARALLEL & - !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & !$OMP PRIVATE(p,q,ok,i,res)& !$OMP DEFAULT(NONE) !$OMP DO collapse(1) @@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f) do i = 1, mo_num call apply_hole(det, s1, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) endif enddo do i = 1, mo_num call apply_hole(det, s2, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) endif enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + end ! V @@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer :: si,sj,sk,sl,s PROVIDE cc_space_v - + !$OMP PARALLEL & !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 @@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then @@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo !$OMP END DO endif - + enddo enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx @@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, call shift_idx_spin(sl,n4_S,l_shift) tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - + !$OMP PARALLEL & !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & !$OMP i,j,k,idx_i,idx_j,idx_k,& !$OMP tmp_i,tmp_j,tmp_k)& !$OMP DEFAULT(NONE) - + do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_ij_l @@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sk,n3_S,k_shift) tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - + !$OMP PARALLEL & !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & !$OMP i,j,l,idx_i,idx_j,idx_l,& !$OMP tmp_i,tmp_j,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_i_kl @@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sj,n2_S,j_shift) tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - + !$OMP PARALLEL & !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & !$OMP i,k,l,idx_i,idx_k,idx_l,& !$OMP tmp_i,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org index 5f67859c..2156a251 100644 --- a/src/utils_cc/org/phase.org +++ b/src/utils_cc/org/phase.org @@ -137,6 +137,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -147,6 +148,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f index 01b41f49..e0703fb8 100644 --- a/src/utils_cc/phase.irp.f +++ b/src/utils_cc/phase.irp.f @@ -96,6 +96,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -106,6 +107,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f index dbd4f4bd..0cf8626c 100644 --- a/src/utils_cc/update_t.irp.f +++ b/src/utils_cc/update_t.irp.f @@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1) !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,a) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(1) + !$OMP DO do a = 1, nV do i = 1, nO t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) @@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2) !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO diff --git a/tests/input/h2_1.xyz b/tests/input/h2_1.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_1.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h2_3.xyz b/tests/input/h2_3.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_3.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h3_2.xyz b/tests/input/h3_2.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_2.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h3_4.xyz b/tests/input/h3_4.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_4.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h4_1.xyz b/tests/input/h4_1.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_1.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_3.xyz b/tests/input/h4_3.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_3.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_5.xyz b/tests/input/h4_5.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_5.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 +