diff --git a/configure b/configure index 66bc9419..48e6fd12 100755 --- a/configure +++ b/configure @@ -215,7 +215,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ EOF @@ -229,7 +228,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files EOF diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 841089ea..506cf069 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -44,8 +44,12 @@ end = struct let get_default = Qpackage.get_ezfio_default "ao_basis";; let read_ao_basis () = - Ezfio.get_ao_basis_ao_basis () - |> AO_basis_name.of_string + let result = + Ezfio.get_ao_basis_ao_basis () + in + if result <> "None" then + AO_basis_name.of_string result + else failwith "No basis" ;; let read_ao_num () = @@ -192,7 +196,7 @@ end = struct ao_expo ; ao_cartesian ; ao_normalized ; - primitives_normalized ; + primitives_normalized ; } = b in write_md5 b ; @@ -207,7 +211,7 @@ end = struct Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - let ao_nucl = + let ao_nucl = Array.to_list ao_nucl |> list_map Nucl_number.to_int in @@ -215,7 +219,7 @@ end = struct ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; let ao_power = - let l = Array.to_list ao_power in + let l = Array.to_list ao_power in List.concat [ (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ; (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ; @@ -227,7 +231,7 @@ end = struct Ezfio.set_ao_basis_ao_cartesian(ao_cartesian); Ezfio.set_ao_basis_ao_normalized(ao_normalized); Ezfio.set_ao_basis_primitives_normalized(primitives_normalized); - + let ao_coef = Array.to_list ao_coef |> list_map AO_coef.to_float @@ -267,7 +271,10 @@ end = struct |> Ezfio.set_ao_basis_ao_md5 ; Some result with - | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) + | _ -> ( "None" + |> Digest.string + |> Digest.to_hex + |> Ezfio.set_ao_basis_ao_md5 ; None) ;; @@ -276,7 +283,7 @@ end = struct to_basis b |> Long_basis.of_basis |> Array.of_list - and unordered_basis = + and unordered_basis = to_long_basis b |> Array.of_list in @@ -289,15 +296,15 @@ end = struct (a.(i) <- None ; i) else find x a (i+1) - and find2 (s,g,n) a i = + and find2 (s,g,n) a i = if i = Array.length a then -1 else - match a.(i) with + match a.(i) with | None -> find2 (s,g,n) a (i+1) | Some (s', g', n') -> if s <> s' || n <> n' then find2 (s,g,n) a (i+1) else - let lc = list_map (fun (prim, _) -> prim) g.Gto.lc + let lc = list_map (fun (prim, _) -> prim) g.Gto.lc and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc in if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i) @@ -313,13 +320,13 @@ end = struct let ao_num = List.length long_basis |> AO_number.of_int in let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc - |> AO_prim_number.of_int ) long_basis + |> AO_prim_number.of_int ) long_basis |> Array.of_list and ao_nucl = - list_map (fun (_,_,n) -> n) long_basis + list_map (fun (_,_,n) -> n) long_basis |> Array.of_list and ao_power = - list_map (fun (x,_,_) -> x) long_basis + list_map (fun (x,_,_) -> x) long_basis |> Array.of_list in let ao_prim_num_max = Array.fold_left (fun s x -> @@ -329,16 +336,16 @@ end = struct in let gtos = - list_map (fun (_,x,_) -> x) long_basis + list_map (fun (_,x,_) -> x) long_basis in let create_expo_coef ec = let coefs = begin match ec with | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos + list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos | `Expos -> list_map (fun x-> list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc ) gtos + prim.GaussianPrimitive.expo) x.Gto.lc ) gtos end in let rec get_n n accu = function @@ -360,7 +367,7 @@ end = struct let ao_coef = create_expo_coef `Coefs |> Array.of_list |> Array.map AO_coef.of_float - and ao_expo = create_expo_coef `Expos + and ao_expo = create_expo_coef `Expos |> Array.of_list |> Array.map AO_expo.of_float in @@ -372,7 +379,7 @@ end = struct } ;; - let reorder b = + let reorder b = let order = ordering b in let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max @@ -464,7 +471,7 @@ Basis set (read-only) :: | line :: tail -> let line = String.trim line in if line = "Basis set (read-only) ::" then - String.concat "\n" tail + String.concat "\n" tail else extract_basis tail in diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml index a4e6176a..832b464e 100644 --- a/ocaml/Input_mo_basis.ml +++ b/ocaml/Input_mo_basis.ml @@ -56,7 +56,10 @@ end = struct let read_ao_md5 () = let ao_md5 = match (Input_ao_basis.Ao_basis.read ()) with - | None -> failwith "Unable to read AO basis" + | None -> ("None" + |> Digest.string + |> Digest.to_hex + |> MD5.of_string) | Some result -> Input_ao_basis.Ao_basis.to_md5 result in let result = diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py similarity index 69% rename from src/trexio/qp_import_trexio.py rename to scripts/qp_import_trexio.py index de8d1269..89096387 100755 --- a/src/trexio/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -13,12 +13,17 @@ Options: import sys import os -import trexio import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +try: + import trexio +except ImportError: + print("Error: trexio python module is not found. Try python3 -m pip install trexio") + sys.exit(1) + try: QP_ROOT = os.environ["QP_ROOT"] @@ -90,14 +95,15 @@ def write_ezfio(trexio_filename, filename): p = re.compile(r'(\d*)$') label = [p.sub("", x).capitalize() for x in label] ezfio.set_nuclei_nucl_label(label) + print("OK") else: ezfio.set_nuclei_nucl_num(1) ezfio.set_nuclei_nucl_charge([0.]) ezfio.set_nuclei_nucl_coord([0.,0.,0.]) ezfio.set_nuclei_nucl_label(["X"]) + print("None") - print("OK") print("Electrons\t...\t", end=' ') @@ -105,12 +111,12 @@ def write_ezfio(trexio_filename, filename): try: num_beta = trexio.read_electron_dn_num(trexio_file) except: - num_beta = sum(charge)//2 + num_beta = int(sum(charge))//2 try: num_alpha = trexio.read_electron_up_num(trexio_file) except: - num_alpha = sum(charge) - num_beta + num_alpha = int(sum(charge)) - num_beta if num_alpha == 0: print("\n\nError: There are zero electrons in the TREXIO file.\n\n") @@ -118,7 +124,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_electrons_elec_alpha_num(num_alpha) ezfio.set_electrons_elec_beta_num(num_beta) - print("OK") + print(f"{num_alpha} {num_beta}") print("Basis\t\t...\t", end=' ') @@ -126,60 +132,113 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() not in ["gaussian", "slater"]: - raise TypeError + if basis_type.lower() in ["gaussian", "slater"]: + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) - shell_num = trexio.read_basis_shell_num(trexio_file) - prim_num = trexio.read_basis_prim_num(trexio_file) - ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) - nucl_index = trexio.read_basis_nucleus_index(trexio_file) - exponent = trexio.read_basis_exponent(trexio_file) - coefficient = trexio.read_basis_coefficient(trexio_file) - shell_index = trexio.read_basis_shell_index(trexio_file) - ao_shell = trexio.read_ao_shell(trexio_file) + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) - ezfio.set_basis_basis("Read from TREXIO") - ezfio.set_basis_shell_num(shell_num) - ezfio.set_basis_prim_num(prim_num) - ezfio.set_basis_shell_ang_mom(ang_mom) - ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) - ezfio.set_basis_prim_expo(exponent) - ezfio.set_basis_prim_coef(coefficient) + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) - nucl_shell_num = [] - prev = None - m = 0 - for i in ao_shell: - if i != prev: - m += 1 - if prev is None or nucl_index[i] != nucl_index[prev]: - nucl_shell_num.append(m) - m = 0 - prev = i - assert (len(nucl_shell_num) == nucl_num) + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) - shell_prim_num = [] - prev = shell_index[0] - count = 0 - for i in shell_index: - if i != prev: - shell_prim_num.append(count) - count = 0 - count += 1 - prev = i - shell_prim_num.append(count) + assert (len(shell_prim_num) == shell_num) - assert (len(shell_prim_num) == shell_num) - - ezfio.set_basis_shell_prim_num(shell_prim_num) - ezfio.set_basis_shell_index([x+1 for x in shell_index]) - ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) - shell_factor = trexio.read_basis_shell_factor(trexio_file) - prim_factor = trexio.read_basis_prim_factor(trexio_file) + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) - print("OK") + elif basis_type.lower() == "numerical": + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = shell_num + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = [1.]*prim_num + coefficient = [1.]*prim_num + shell_index = [i for i in range(shell_num)] + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("None") + ezfio.set_ao_basis_ao_basis("None") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = [1.]*prim_num + else: + raise TypeError + + print(basis_type) except: print("None") ezfio.set_ao_basis_ao_cartesian(True) @@ -256,9 +315,11 @@ def write_ezfio(trexio_filename, filename): # ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) - ezfio.set_ao_basis_ao_basis("Read from TREXIO") - print("OK") + print("OK") + + else: + print("None") # _ @@ -279,6 +340,7 @@ def write_ezfio(trexio_filename, filename): except: label = "None" ezfio.set_mo_basis_mo_label(label) + ezfio.set_determinants_mo_label(label) try: clss = trexio.read_mo_class(trexio_file) @@ -303,10 +365,10 @@ def write_ezfio(trexio_filename, filename): for i in range(num_beta): mo_occ[i] += 1. ezfio.set_mo_basis_mo_occ(mo_occ) + print("OK") except: - pass + print("None") - print("OK") print("Pseudos\t\t...\t", end=' ') @@ -386,9 +448,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + print("OK") - - print("OK") + else: + print("None") diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index 51d726da..6ad9b998 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -67,3 +67,15 @@ doc: Use normalized primitive functions interface: ezfio, provider default: true +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio +default: False + diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f new file mode 100644 index 00000000..721a3e57 --- /dev/null +++ b/src/ao_basis/cosgtos.irp.f @@ -0,0 +1,33 @@ +BEGIN_PROVIDER [ logical, use_cosgtos ] + implicit none + BEGIN_DOC +! If true, use cosgtos for AO integrals + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + call ezfio_has_ao_basis_use_cosgtos(has) + if (has) then +! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' + call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) + else + use_cosgtos = .False. + endif + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( use_cosgtos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read use_cosgtos with MPI' + endif + IRP_ENDIF + +! call write_time(6) + +END_PROVIDER diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index b9caaf5d..61d23b1e 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,3 +1,2 @@ ao_basis pseudo -cosgtos_ao_int diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/ao_one_e_ints/aos_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/aos_cosgtos.irp.f rename to src/ao_one_e_ints/aos_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_Coul_integrals.irp.f rename to src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_kin_integrals.irp.f rename to src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 928053ad..446bf730 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -455,10 +455,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) else do ix=0,n_pt_in X(ix) = 0.d0 @@ -469,7 +471,8 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(a-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) nx = nd do ix=0,n_pt_in @@ -479,10 +482,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x1_pol_mult_one_e(a-1,c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) endif end @@ -519,7 +524,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) do ix=0,nx X(ix) *= dble(c-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny = 0 do ix=0,dim Y(ix) = 0.d0 @@ -527,7 +533,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) call I_x1_pol_mult_one_e(0,c-1,R1x,R1xp,R2x,Y,ny,dim) if(ny.ge.0)then - call multiply_poly(Y,ny,R1xp,2,d,nd) +! call multiply_poly(Y,ny,R1xp,2,d,nd) + call multiply_poly_c2(Y,ny,R1xp,d,nd) endif endif end diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index d4c995e6..9f523fca 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,19 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + +[ao_cholesky_threshold] +type: Threshold +doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero +interface: ezfio,provider,ocaml +default: 1.e-12 + [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..bb81b141 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - integer :: i,j,k,l - double precision :: xnorm0, x, integral - double precision, external :: ao_two_e_integral - - cholesky_ao_num_guess = 0 - xnorm0 = 0.d0 - x = 0.d0 - do j=1,ao_num - do i=1,ao_num - integral = ao_two_e_integral(i,i,j,j) - if (integral > ao_integrals_threshold) then - cholesky_ao_num_guess += 1 - else - x += integral - endif - enddo - enddo - print *, 'Cholesky decomposition of AO integrals' - print *, '--------------------------------------' - print *, '' - print *, 'Estimated Error: ', x - print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' - + cholesky_ao_num_guess = ao_num*ao_num / 2 END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -39,7 +17,7 @@ END_PROVIDER END_DOC type(c_ptr) :: ptr - integer :: fd, i,j,k,l, rank + integer :: fd, i,j,k,l,m,rank double precision, pointer :: ao_integrals(:,:,:,:) double precision, external :: ao_two_e_integral @@ -49,28 +27,90 @@ END_PROVIDER 8, fd, .False., ptr) call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - double precision :: integral + print*, 'Providing the AO integrals (Cholesky)' + call wall_time(wall_1) + call cpu_time(cpu_1) + + ao_integrals = 0.d0 + + double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) - do l=1,ao_num - do j=1,l - do k=1,ao_num - do i=1,k - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - enddo + double precision, external :: get_ao_two_e_integral + + if (read_ao_two_e_integrals) then + PROVIDE ao_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + + endif ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/ao_two_e_ints/gauss_legendre.irp.f similarity index 100% rename from src/cosgtos_ao_int/gauss_legendre.irp.f rename to src/ao_two_e_ints/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/two_e_Coul_integrals.irp.f rename to src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 82ffbc90..85ff5bcf 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -590,8 +590,20 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) +! call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) + integer :: ib, ic + if (ior(n_Ix,n_Iy) >= 0) then + do ib=0,n_Ix + do ic = 0,n_Iy + d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib) + enddo + enddo + + do n_pt_tmp = n_Ix+n_Iy, 0, -1 + if (d_poly(n_pt_tmp) /= 0.d0) exit + enddo + endif + if (n_pt_tmp == -1) then return endif @@ -600,8 +612,21 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) +! call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) + if (ior(n_pt_tmp,n_Iz) >= 0) then + ! Bottleneck here + do ib=0,n_pt_tmp + do ic = 0,n_Iz + d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib) + enddo + enddo + + do n_pt_out = n_pt_tmp+n_Iz, 0, -1 + if (d1(n_pt_out) /= 0.d0) exit + enddo + endif + + double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -948,8 +973,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= dble(a-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -970,8 +996,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) endif ny=0 @@ -988,9 +1015,9 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) - +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1028,8 +1055,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 @@ -1039,8 +1067,9 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end @@ -1067,8 +1096,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1086,8 +1116,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1097,9 +1128,9 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) - +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1146,8 +1177,10 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + call multiply_poly_c2(Y,ny,D_00,d,nd) + return case default @@ -1164,8 +1197,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) X(ix) *= dble(c-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_01,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_01,2,d,nd) + call multiply_poly_c2(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1174,8 +1208,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + call multiply_poly_c2(Y,ny,D_00,d,nd) end select end @@ -1233,3 +1268,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) enddo end + + +subroutine multiply_poly_local(b,nb,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:nc) + double precision, intent(inout) :: d(0:nb+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index ca50dd56..f7a42f37 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -7,7 +7,13 @@ program bi_ort_ints my_n_pt_r_grid = 10 my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_3e +! call test_3e + call test_5idx +! call test_5idx2 +end + +subroutine test_5idx2 + PROVIDE three_e_5_idx_cycle_2_bi_ort end subroutine test_3e @@ -16,11 +22,12 @@ subroutine test_3e double precision :: accu, contrib,new,ref i = 1 k = 1 + n = 0 accu = 0.d0 do i = 1, mo_num - do k = 1, mo_num + do k = 1, mo_num do j = 1, mo_num - do l = 1, mo_num + do l = 1, mo_num do m = 1, mo_num do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) @@ -31,6 +38,7 @@ subroutine test_3e print*,'pb !!' print*,i,k,j,l,m,n print*,ref,new,contrib + stop endif enddo enddo @@ -42,3 +50,93 @@ subroutine test_3e end + +subroutine test_5idx + implicit none + integer :: i,k,j,l,m,n,ipoint + double precision :: accu, contrib,new,ref + i = 1 + k = 1 + n = 0 + accu = 0.d0 + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch12' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif +! + new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle1' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle2' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch23' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch13' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + enddo + enddo + enddo + enddo + enddo + print*,'accu = ',accu/dble(mo_num)**5 + + +end diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 507408e5..1b2e777e 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,11 @@ - ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -14,289 +18,221 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, implicit none integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' - call wall_time(wall0) + double precision :: wall1, wall0 + integer :: ipoint + double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + print *, ' Providing the three_e_5_idx_bi_ort ...' + call wall_time(wall0) + + do m = 1, mo_num + + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP grad_mli, orb_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 - call print_memory_usage() + deallocate(orb_mat,grad_mli) -END_PROVIDER -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 - call print_memory_usage() -END_PROVIDER + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() -END_PROVIDER + deallocate(lm_grad_ik) -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 - call print_memory_usage() -END_PROVIDER + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) + enddo + call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 call print_memory_usage() -END_PROVIDER - -! --- +END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmkl_old.irp.f b/src/bi_ort_ints/three_body_ijmkl_old.irp.f new file mode 100644 index 00000000..105cd179 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmkl_old.irp.f @@ -0,0 +1,295 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) + three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_direct_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER + diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index f82e8725..d8145c3e 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC -! matrix element of the -L three-body operator +! matrix element of the -L three-body operator ! ! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :) END_DOC @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n implicit none integer :: i, j, k, l, m, n double precision :: integral, wall1, wall0 - character*(128) :: name_file + character*(128) :: name_file three_body_ints_bi_ort = 0.d0 print *, ' Providing the three_body_ints_bi_ort ...' @@ -27,12 +27,12 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) ! else - !provide x_W_ki_bi_ortho_erf_rk + !provide x_W_ki_bi_ortho_erf_rk provide mos_r_in_r_array_transp mos_l_in_r_array_transp !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,n,integral) & + !$OMP PRIVATE (i,j,k,l,m,n,integral) & !$OMP SHARED (mo_num,three_body_ints_bi_ort) !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num @@ -43,7 +43,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) - three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral + three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral enddo enddo enddo @@ -64,7 +64,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read") ! endif -END_PROVIDER +END_PROVIDER ! --- @@ -72,7 +72,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -86,22 +86,25 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 + ! (n, l, k, m, j, i) do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) - integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo end subroutine give_integrals_3_body_bi_ort @@ -112,7 +115,7 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -124,13 +127,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) @@ -139,11 +142,11 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) @@ -152,13 +155,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) ) @@ -177,7 +180,7 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS ! END_DOC @@ -189,13 +192,13 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) - integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & + integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) ) - integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & + integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) ) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b63375cf..1467d9a4 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,5 +1,5 @@ subroutine run_ccsd_space_orb - + implicit none integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d @@ -12,34 +12,30 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) - + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) - integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) - - PROVIDE mo_two_e_integrals_in_map + integer :: nO, nV, nOa, nVa + +! PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) + nOa = cc_nOa + nVa = cc_nVa ! Check that the reference is a closed shell determinant if (cc_ref_is_open_shell) then call abort endif - + ! Number of occ/vir spatial orb nO = nOa nV = nVa - + allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir @@ -47,7 +43,7 @@ subroutine run_ccsd_space_orb !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) !print*,'occ',list_occ !print*,'vir',list_vir - + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) @@ -76,7 +72,7 @@ subroutine run_ccsd_space_orb print*,'Det energy', uncorr_energy call ccsd_energy_space(nO,nV,tau,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy - + nb_iter = 0 not_converged = .True. max_r1 = 0d0 @@ -86,9 +82,9 @@ subroutine run_ccsd_space_orb write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(ta) - + do while (not_converged) - + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vo(nO,nV,t1,t2,H_vo) @@ -97,7 +93,7 @@ subroutine run_ccsd_space_orb call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) max_r = max(max_r1,max_r2) - + ! Update if (cc_update_method == 'diis') then !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) @@ -109,7 +105,7 @@ subroutine run_ccsd_space_orb call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) else - print*,'Unkonw cc_method_method: '//cc_update_method + print*,'Unkown cc_method_method: '//cc_update_method endif call update_tau_space(nO,nV,t1,t2,tau) @@ -122,7 +118,7 @@ subroutine run_ccsd_space_orb if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then not_converged = .False. endif - + enddo write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(tb) @@ -141,18 +137,18 @@ subroutine run_ccsd_space_orb call write_t1(nO,nV,t1) call write_t2(nO,nV,t2) - + ! Deallocation if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif deallocate(H_vv,H_oo,H_vo,r1,r2,tau) - + ! CCSD(T) double precision :: e_t - if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then ! Dumb way !call wall_time(ta) @@ -169,8 +165,13 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + + e_t = uncorr_energy + energy ! For print in next call + call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) print*,'Time: ',tb-ta, ' s' @@ -180,7 +181,7 @@ subroutine run_ccsd_space_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' print*,'' endif - + print*,'Reference determinant:' call print_det(det,N_int) @@ -211,8 +212,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) !$omp default(none) e = 0d0 !$omp do - do i = 1, nO - do a = 1, nV + do a = 1, nV + do i = 1, nO e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) enddo enddo @@ -232,7 +233,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) energy = energy + e !$omp end critical !$omp end parallel - + end ! Tau @@ -250,12 +251,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) ! internal integer :: i,j,a,b - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -267,7 +268,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) enddo !$OMP END DO !$OMP END PARALLEL - + end ! R1 @@ -283,7 +284,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! out double precision, intent(out) :: r1(nO,nV), max_r1 - + ! internal integer :: u,i,j,beta,a,b @@ -304,7 +305,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) - ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO @@ -324,7 +325,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call dgemm('T','N', nO, nV, nO, & 1d0, X_oo, size(X_oo,2), & t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) + 1d0, r1 , size(r1,1)) deallocate(X_oo) ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) @@ -373,7 +374,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -385,16 +386,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + call dgemv('T', nV*nO, nO*nV, & 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & H_vo , 1, & 1d0, r1 , 1) - + deallocate(X_voov) ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) - ! <=> + ! <=> ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) !do beta = 1, nV ! do u = 1, nO @@ -412,7 +413,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -429,17 +430,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & t1 , 1, & 1d0, r1 , 1) - + deallocate(X_ovov) - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) - ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do a = 1, nV ! do b = 1, nV - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) ! enddo ! enddo ! enddo @@ -452,24 +453,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) enddo enddo enddo enddo !$omp end do nowait - !$omp do collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do u = 1, nO - T_vvoo(a,b,i,u) = tau(i,u,a,b) + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo enddo @@ -481,17 +482,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_vvov,T_vvoo) - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) - ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do j = 1, nO ! do a = 1, nV - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! enddo ! enddo ! enddo @@ -504,8 +505,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) - !$omp do collapse(3) do u = 1, nO + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -513,23 +514,21 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('T','N', nO, nV, nO*nO*nV, & -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & tau , size(tau,1) * size(tau,2) * size(tau,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_oovo) max_r1 = 0d0 do a = 1, nV do i = 1, nO - if (dabs(r1(i,a)) > max_r1) then - max_r1 = dabs(r1(i,a)) - endif + max_r1 = max(dabs(r1(i,a)), max_r1) enddo enddo @@ -538,7 +537,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,r1) & !$omp private(a,i) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do i = 1, nO r1(i,a) = -r1(i,a) @@ -546,7 +545,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + end ! H_oo @@ -578,7 +577,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -601,8 +600,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 1d0, tau , size(tau,1), & cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) - + 1d0, H_oo , size(H_oo,1)) + end ! H_vv @@ -633,7 +632,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -656,13 +655,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - - !$omp do collapse(3) + + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) enddo enddo enddo @@ -676,7 +675,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) 1d0, H_vv , size(H_vv,1)) deallocate(tmp_tau) - + end ! H_vo @@ -704,7 +703,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! enddo ! enddo - ! + ! ! enddo !enddo @@ -727,7 +726,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -746,7 +745,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) 1d0, H_vo, 1) deallocate(w) - + end ! R2 @@ -765,13 +764,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! internal double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:) integer :: u,v,i,j,beta,gam,a,b allocate(g_occ(nO,nO), g_vir(nV,nV)) allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) allocate(A1(nO,nO,nO,nO)) - + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) call compute_A1(nO,nV,t1,t2,tau,A1) @@ -787,7 +786,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -835,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! enddo !enddo - allocate(B1(nV,nV,nV,nV)) - call compute_B1(nO,nV,t1,t2,B1) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1,1) * size(B1,2), & - 1d0, r2, size(r2,1) * size(r2,2)) - deallocate(B1) +! allocate(B1(nV,nV,nV,nV)) +! call compute_B1(nO,nV,t1,t2,B1) + allocate(B1_gam(nV,nV,nV)) + do gam=1,nV + call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) + call dgemm('N','N',nO*nO,nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1_gam , size(B1_gam,1) * size(B1_gam,2), & + 1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2)) + enddo + deallocate(B1_gam) + !do gam = 1, nV ! do beta = 1, nV @@ -863,7 +867,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -875,7 +879,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & @@ -885,7 +889,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +925,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +961,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +983,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -991,7 +995,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1009,13 +1013,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1023,8 +1027,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nV*nO*nV,nV,nO, & @@ -1036,12 +1040,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, t1, size(t1,1), & Y_vovv, size(Y_vovv,1), & 0d0, X_oovv, size(X_oovv,1)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1055,7 +1059,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_vovo,Y_vovv) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1079,7 +1083,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1092,7 +1096,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1111,13 +1115,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1125,8 +1129,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO,nO*nV*nO,nV, & @@ -1138,12 +1142,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1155,7 +1159,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_vovo,Y_oovo) !do gam = 1, nV @@ -1182,19 +1186,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO - X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1206,17 +1210,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1228,9 +1232,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_ovvo,Y_voov) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1252,7 +1256,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1268,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1281,12 +1285,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1298,7 +1302,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1319,7 +1323,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do i = 1, nO do gam = 1, nV @@ -1331,7 +1335,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do beta = 1, nV do v = 1, nO do a = 1, nV @@ -1343,17 +1347,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1367,13 +1371,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_ovov,Y_ovov,Z_ovov) - + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2) & !$omp private(i,j,a,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do a = 1, nV do j = 1, nO @@ -1385,22 +1389,20 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + max_r2 = 0d0 do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - if (dabs(r2(i,j,a,b)) > max_r2) then - max_r2 = dabs(r2(i,j,a,b)) - endif + max_r2 = max(r2(i,j,a,b), max_r2) enddo enddo enddo enddo deallocate(g_occ,g_vir,J1,K1,A1) - + end ! A1 @@ -1429,12 +1431,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) = A1(u,v,i,j) & ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) - ! + ! ! do b = 1, nV ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - ! enddo + ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1442,13 +1444,13 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) - + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) !$omp parallel & !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1462,7 +1464,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do u = 1, nO @@ -1484,7 +1486,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1496,7 +1498,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) enddo !$omp end do !$omp end parallel - + deallocate(X_vooo,Y_oooo) ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) @@ -1510,11 +1512,95 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) 1d0, tau , size(tau,1) * size(tau,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & 1d0, A1 , size(A1,1) * size(A1,2)) - + end ! B1 +subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) + + implicit none + + integer, intent(in) :: nO,nV,gam + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! +! do i = 1, nO +! B1(a,b,beta) = B1(a,b,beta) & +! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & +! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) +! enddo +! +! enddo +! enddo +! enddo + + double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) +! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + !$omp end do nowait + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + +! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, 1, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1(1,gam), size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta) + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + subroutine compute_B1(nO,nV,t1,t2,B1) implicit none @@ -1532,28 +1618,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1) ! do beta = 1, nV ! do b = 1, nV ! do a = 1, nV - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) ! do i = 1, nO ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo - + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1564,8 +1650,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1573,17 +1659,17 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & t1 , size(t1,1), & 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) - + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & @@ -1594,7 +1680,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1606,9 +1692,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do !$omp end parallel - + deallocate(X_vvvo,Y_vvvv) - + end ! g_occ @@ -1629,14 +1715,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) !do i = 1, nO ! do u = 1, nO ! g_occ(u,i) = H_oo(u,i) - ! + ! ! do a = 1, nV ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) - ! + ! ! do j = 1, nO ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) ! enddo - ! + ! ! enddo ! enddo !enddo @@ -1657,8 +1743,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo enddo !$omp end do - - !$omp do collapse(1) + + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1670,7 +1756,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do !$omp end parallel - + end ! g_vir @@ -1691,23 +1777,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) !do beta = 1, nV ! do a = 1, nV ! g_vir(a,beta) = H_vv(a,beta) - ! + ! ! do i = 1, nO ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) - ! + ! ! do b = 1, nV ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) ! enddo - ! + ! ! enddo ! enddo !enddo - + call dgemm('N','N',nV,nV,nO, & -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - + !$omp parallel & !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & !$omp private(i,b,a,beta) & @@ -1720,7 +1806,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do collapse(1) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1732,7 +1818,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do !$omp end parallel - + end ! J1 @@ -1765,7 +1851,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! do b = 1, nV ! J1(u,a,beta,i) = J1(u,a,beta,i) & - ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) ! enddo ! do j = 1, nO @@ -1775,7 +1861,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1783,13 +1869,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) - + !$omp parallel & !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1797,10 +1883,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do a = 1, nV @@ -1812,7 +1898,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV*nO,nV,nO, & -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & t1 , size(t1,1), & @@ -1822,8 +1908,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Y_ovov) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1831,8 +1917,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovoo) @@ -1849,7 +1935,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1861,7 +1947,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1886,8 +1972,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,12 +1981,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,11 +1994,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - - !$omp do collapse(3) + do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1920,10 +2006,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & @@ -1933,8 +2019,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1942,12 +2028,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) - + deallocate(X_ovvo,Z_ovvo,Y_ovov) + end ! K1 @@ -1982,7 +2068,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! do b = 1, nV ! K1(u,a,i,beta) = K1(u,a,i,beta) & - ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) ! enddo ! do j = 1, nO @@ -1991,19 +2077,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2015,8 +2101,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do j = 1, nO do b = 1, nV @@ -2024,11 +2110,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -2036,8 +2122,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO*nV*nO,nV,nO, & @@ -2060,7 +2146,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,Z) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2074,5 +2160,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end parallel deallocate(X,Y,Z) - + end diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 3b762a06..1aab6bd7 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -10,51 +10,43 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) - double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) - double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) - double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc + double precision :: e,ta,tb - !allocate(W(nV,nV,nV,nO,nO,nO)) - !allocate(V(nV,nV,nV,nO,nO,nO)) - allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) - allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) - allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) - allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - ! Temporary arrays !$OMP PARALLEL & - !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV - X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV - T_vvoo(d,c,k,j) = t2(k,j,c,d) + T_voov(d,k,j,c) = t2(k,j,c,d) enddo enddo enddo @@ -62,191 +54,399 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END DO nowait !v_vooo(c,j,k,l) * t2(i,l,a,b) & - !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do c = 1, nV - do l = 1, nO - X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !$OMP DO collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do l = 1, nO - T_ovvo(l,a,b,i) = t2(i,l,a,b) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !v_vvoo(b,c,j,k) * t1(i,a) & - !X_vvoo(b,c,k,j) * T1_vo(a,i) & - - !$OMP DO collapse(3) - do j = 1, nO + !$OMP DO + do c = 1, nV do k = 1, nO - do c = 1, nV - do b = 1, nV - X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(1) - do i = 1, nO + !$OMP DO + do b = 1, nV do a = 1, nV - T_vo(a,i) = t1(i,a) + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO nowait - call wall_time(ta) - energy = 0d0 + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO do c = 1, nV do b = 1, nV - do a = 1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) - call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) - call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) - call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) - - call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) - !$OMP PARALLEL & - !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& - !$OMP PRIVATE(i,j,k,e,delta) & - !$OMP DEFAULT(NONE) - e = 0d0 - !$OMP DO - do i = 1, nO - do j = 1, nO - do k = 1, nO - delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) - e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta - enddo - enddo + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - energy = energy + e - !$OMP END CRITICAL - !$OMP END PARALLEL enddo enddo - call wall_time(tb) - write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo + !$OMP END DO nowait - energy = energy / 3d0 + !$OMP END PARALLEL - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) - !deallocate(V,W) + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + + !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) + e = 0d0 + !$OMP DO SCHEDULE(dynamic) + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + enddo + + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + + !$OMP END PARALLEL + + energy = energy / 3.d0 + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) end -subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t2(nO,nO,nV,nV) - double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) - double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) + double precision, intent(out) :: W_cba(nO,nO,nO) + double precision, intent(out) :: W_bca(nO,nO,nO) + double precision, intent(out) :: W_cab(nO,nO,nO) + double precision, intent(out) :: W_bac(nO,nO,nO) + double precision, intent(out) :: W_acb(nO,nO,nO) integer :: l,i,j,k,d + double precision, allocatable, dimension(:,:,:,:) :: W_ikj + double precision, allocatable :: X(:,:,:,:) + allocate(W_ikj(nO,nO,nO,6)) + allocate(X(nV,nO,nO,3)) - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & - !$OMP PRIVATE(i,j,k,d,l) & - !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do i = 1, nO - W_abc(i,j,k) = 0.d0 - - do d = 1, nV - W_abc(i,j,k) = W_abc(i,j,k) & - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) - - enddo - - do l = 1, nO - W_abc(i,j,k) = W_abc(i,j,k) & - - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & - - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj - - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik - - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij - - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj - - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik - enddo - + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k,1) = T_voov(d,k,i,a) + X(d,i,k,2) = T_voov(d,k,i,b) + X(d,i,k,3) = T_voov(d,k,i,c) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + +! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO) + +! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO) +! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO) + +! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO) + + deallocate(X) + + allocate(X(nO,nO,nO,3)) + + do k=1,nO + do j=1,nO + do l=1,nO + X(l,j,k,1) = X_ooov(l,k,j,a) + X(l,j,k,2) = X_ooov(l,k,j,b) + X(l,j,k,3) = X_ooov(l,k,j,c) + enddo + enddo + enddo + + +! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO) + +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO) + +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + +! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + + do k=1,nO + do j=1,nO + do i=1,nO + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) + W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5) + W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6) + enddo + enddo + enddo + + deallocate(X,W_ikj) end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t1(nO,nV) - double precision, intent(in) :: T_vo(nV,nO) - double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) - double precision, intent(in) :: W(nO,nO,nO) - double precision, intent(out) :: V(nO,nO,nO) + double precision, intent(in) :: T_ov(nO,nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) integer :: i,j,k - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & - !$OMP PRIVATE(i,j,k) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) do k = 1, nO do j = 1, nO do i = 1, nO - !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & - V(i,j,k) = W(i,j,k) & - + X_vvoo(b,c,k,j) * T_vo(a,i) & - + X_vvoo(a,c,k,i) * T_vo(b,j) & - + X_vvoo(a,b,j,i) * T_vo(c,k) + V_abc(i,j,k) = W_abc(i,j,k) & + + X_oovv(j,k,b,c) * T_ov(i,a) & + + X_oovv(i,k,a,c) * T_ov(j,b) & + + X_oovv(i,j,a,b) * T_ov(k,c) + + V_cba(i,j,k) = W_cba(i,j,k) & + + X_oovv(j,k,b,a) * T_ov(i,c) & + + X_oovv(i,k,c,a) * T_ov(j,b) & + + X_oovv(i,j,c,b) * T_ov(k,a) + + V_bca(i,j,k) = W_bca(i,j,k) & + + X_oovv(j,k,c,a) * T_ov(i,b) & + + X_oovv(i,k,b,a) * T_ov(j,c) & + + X_oovv(i,j,b,c) * T_ov(k,a) + + V_cab(i,j,k) = W_cab(i,j,k) & + + X_oovv(j,k,a,b) * T_ov(i,c) & + + X_oovv(i,k,c,b) * T_ov(j,a) & + + X_oovv(i,j,c,a) * T_ov(k,b) + + V_bac(i,j,k) = W_bac(i,j,k) & + + X_oovv(j,k,a,c) * T_ov(i,b) & + + X_oovv(i,k,b,c) * T_ov(j,a) & + + X_oovv(i,j,b,a) * T_ov(k,c) + + V_acb(i,j,k) = W_acb(i,j,k) & + + X_oovv(j,k,c,b) * T_ov(i,a) & + + X_oovv(i,k,a,b) * T_ov(j,c) & + + X_oovv(i,j,a,c) * T_ov(k,b) + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f new file mode 100644 index 00000000..b669025e --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -0,0 +1,363 @@ +! Main +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(inout) :: energy + + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb,eccsd + + eccsd = energy + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) + + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO + do d = 1, nV + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO + do d = 1, nV + T_voov(d,k,j,c) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & + + !$OMP DO + do c = 1, nV + do k = 1, nO + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO + do c = 1, nV + do b = 1, nV + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP END PARALLEL + + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc +! logical, external :: omp_test_lock + + double precision, allocatable :: memo(:), Pabc(:), waccu(:) + integer*8, allocatable :: sampled(:) +! integer(omp_lock_kind), allocatable :: lock(:) + integer*2 , allocatable :: abc(:,:) + integer*8 :: Nabc, i8 + integer*8, allocatable :: iorder(:) + double precision :: eocc + double precision :: norm + integer :: kiter, isample + + + ! Prepare table of triplets (a,b,c) + + Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) + +! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) + Nabc = 0_8 + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + Nabc = Nabc + 1_8 + Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = c + enddo + + Nabc = Nabc + 1_8 + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = a + Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) + + Nabc = Nabc + 1_8 + abc(1,Nabc) = b + abc(2,Nabc) = a + abc(3,Nabc) = b + Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) + enddo + enddo + + do i8=1,Nabc + iorder(i8) = i8 + enddo + + ! Sort triplets in decreasing Pabc + call dsort_big(Pabc, iorder, Nabc) + + ! Normalize + norm = 0.d0 + do i8=Nabc,1,-1 + norm = norm + Pabc(i8) + enddo + norm = 1.d0/norm + do i8=1,Nabc + Pabc(i8) = Pabc(i8) * norm + enddo + + call i8set_order_big(abc, iorder, Nabc) + + + ! Cumulative distribution for sampling + waccu(Nabc) = 0.d0 + do i8=Nabc-1,1,-1 + waccu(i8) = waccu(i8+1) - Pabc(i8+1) + enddo + waccu(:) = waccu(:) + 1.d0 + + logical :: converged, do_comp + double precision :: eta, variance, error, sample + double precision :: t00, t01 + integer*8 :: ieta, Ncomputed + integer*8, external :: binary_search + + integer :: nbuckets + nbuckets = 100 + + double precision, allocatable :: wsum(:) + allocate(wsum(nbuckets)) + + converged = .False. + Ncomputed = 0_8 + + energy = 0.d0 + variance = 0.d0 + memo(:) = 0.d0 + sampled(:) = -1_8 + + integer*8 :: ileft, iright, imin + ileft = 1_8 + iright = Nabc + integer*8, allocatable :: bounds(:,:) + + allocate (bounds(2,nbuckets)) + do isample=1,nbuckets + eta = 1.d0/dble(nbuckets) * dble(isample) + ieta = binary_search(waccu,eta,Nabc) + bounds(1,isample) = ileft + bounds(2,isample) = ieta + ileft = ieta+1 + wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) ) + enddo + + Pabc(:) = 1.d0/Pabc(:) + + print '(A)', '' + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' | E(CCSD(T)) | Error | % |' + print '(A)', ' +----------------------+--------------+----------+' + + + call wall_time(t00) + imin = 1_8 + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + !$OMP DEFAULT(SHARED) + + do kiter=1,Nabc + + !$OMP MASTER + do while ((imin <= Nabc).and.(sampled(imin)>-1_8)) + imin = imin+1 + enddo + + ! Deterministic part + if (imin < Nabc) then + ieta=imin + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + + ! Stochastic part + call random_number(eta) + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + cycle + endif + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + + if (sampled(ieta) == -1_8) then + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + sampled(ieta) = sampled(ieta)+1_8 + + enddo + + call wall_time(t01) + if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then + t00 = t01 + + !$OMP TASKWAIT + + double precision :: ET, ET2 + double precision :: energy_stoch, energy_det + double precision :: scale + double precision :: w + double precision :: tmp + energy_stoch = 0.d0 + energy_det = 0.d0 + norm = 0.d0 + scale = 1.d0 + ET = 0.d0 + ET2 = 0.d0 + + + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample))) + scale = scale - wsum(isample) + else + exit + endif + enddo + + do ieta=bounds(1,isample), Nabc + w = dble(max(sampled(ieta),0_8)) + tmp = w * memo(ieta) * Pabc(ieta) + ET = ET + tmp + ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta) + norm = norm + w + enddo + norm = norm/scale + if (norm > 0.d0) then + energy_stoch = ET / norm + variance = ET2 / norm - energy_stoch*energy_stoch + endif + + energy = energy_det + energy_stoch + + print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + endif + !$OMP END MASTER + if (imin >= Nabc) exit + enddo + + !$OMP END PARALLEL + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', '' + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) +end + + + +integer*8 function binary_search(arr, key, size) + implicit none + BEGIN_DOC +! Searches the key in array arr(1:size) between l_in and r_in, and returns its index + END_DOC + integer*8 :: size, i, j, mid, l_in, r_in + double precision, dimension(size) :: arr(1:size) + double precision :: key + + i = 1_8 + j = size + + do while (j >= i) + mid = i + (j - i) / 2 + if (arr(mid) >= key) then + if (mid > 1 .and. arr(mid - 1) < key) then + binary_search = mid + return + end if + j = mid - 1 + else if (arr(mid) < key) then + i = mid + 1 + else + binary_search = mid + 1 + return + end if + end do + binary_search = i +end function binary_search + diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 6f40a809..0705d103 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -76,6 +76,8 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) double precision, allocatable :: fock_diag_tmp(:,:) + if (csubset == 0) return + allocate(fock_diag_tmp(2,mo_num+1)) call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) @@ -177,6 +179,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d monoAdo = .true. monoBdo = .true. + if (csubset == 0) return do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 4c271a4b..77377554 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! and transpose ! ------------------------------------------- -! call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii) double precision :: hmono, htwoe, hthree call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states @@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg deleted file mode 100644 index 8edeecd0..00000000 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ /dev/null @@ -1,19 +0,0 @@ -[ao_expoim_cosgtos] -type: double precision -doc: imag part for Exponents for each primitive of each cosGTOs |AO| -size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) -interface: ezfio, provider - -[use_cosgtos] -type: logical -doc: If true, use cosgtos for AO integrals -interface: ezfio,provider,ocaml -default: False - -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED deleted file mode 100644 index 932f88a3..00000000 --- a/src/cosgtos_ao_int/NEED +++ /dev/null @@ -1,2 +0,0 @@ -ezfio_files -ao_basis diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst deleted file mode 100644 index 01f25d6d..00000000 --- a/src/cosgtos_ao_int/README.rst +++ /dev/null @@ -1,4 +0,0 @@ -============== -cosgtos_ao_int -============== - diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f deleted file mode 100644 index d65dfba5..00000000 --- a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f +++ /dev/null @@ -1,7 +0,0 @@ -program cosgtos_ao_int - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' -end diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index b6ec073f..df753449 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -39,7 +39,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 print*,'*****' endif - psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) E_tc = eigval_right_tc_bi_orth(1) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..8b1e6e1c 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,11 +6,42 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + call set_multiple_levels_omp(.False.) + print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) enddo !$OMP END PARALLEL DO + print *, '' + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: i,j,k + double precision, allocatable :: buffer(:,:) + + print *, 'AO->MO Transformation of Cholesky vectors .' + !$OMP PARALLEL PRIVATE(i,j,k,buffer) + allocate(buffer(mo_num,mo_num)) + !$OMP DO SCHEDULE(static) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp(k,i,j) = buffer(i,j) + enddo + enddo + enddo + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + print *, '' END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index 4ffb0134..d807f619 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -4,24 +4,68 @@ BEGIN_DOC ! big_array_coulomb_integrals(j,i,k) = = (ik|jj) ! - ! big_array_exchange_integrals(i,j,k) = = (ij|kj) + ! big_array_exchange_integrals(j,i,k) = = (ij|kj) END_DOC - integer :: i,j,k,l + integer :: i,j,k,l,a double precision :: get_two_e_integral double precision :: integral - do k = 1, mo_num - do i = 1, mo_num - do j = 1, mo_num - l = j - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - big_array_coulomb_integrals(j,i,k) = integral - l = j - integral = get_two_e_integral(i,j,l,k,mo_integrals_map) - big_array_exchange_integrals(j,i,k) = integral + if (do_ao_cholesky) then + + double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) + allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + do j=1,mo_num + buffer_jj(:,j) = cholesky_mo_transp(:,j,j) + enddo + + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + buffer_jj, cholesky_ao_num, 0.d0, & + buffer, mo_num*mo_num) + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + big_array_coulomb_integrals(j,i,k) = buffer(i,k,j) + enddo + enddo + enddo + deallocate(buffer_jj) + + allocate(buffer_jj(mo_num,mo_num)) + + do j = 1, mo_num + + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + buffer_jj, mo_num) + + do k=1,mo_num + do i=1,mo_num + big_array_exchange_integrals(j,i,k) = buffer_jj(i,k) + enddo + enddo + enddo + + deallocate(buffer_jj) + + else + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + l = j + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + big_array_coulomb_integrals(j,i,k) = integral + l = j + integral = get_two_e_integral(i,j,l,k,mo_integrals_map) + big_array_exchange_integrals(j,i,k) = integral + enddo + enddo enddo - enddo - enddo + + endif END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b7ef901d..a461504e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1353,15 +1353,30 @@ END_PROVIDER integer :: i,j double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map - mo_two_e_integrals_jj = 0.d0 - mo_two_e_integrals_jj_exchange = 0.d0 + + if (do_ao_cholesky) then + do j=1,mo_num + do i=1,mo_num + !TODO: use dgemm + mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) + mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + enddo + enddo + + else + + do j=1,mo_num + do i=1,mo_num + mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) + mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) + enddo + enddo + + endif do j=1,mo_num do i=1,mo_num - mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) - mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) - mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) + mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) enddo enddo diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index cbd0b406..5e99600e 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -187,6 +187,19 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) + BEGIN_DOC +! gradient of j(mu(r1,r2),r12) form of jastrow. +! +! if mu(r1,r2) = cst ---> j1b_type < 200 and +! +! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) +! +! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and +! +! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) +! +! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + END_DOC include 'constants.include.F' implicit none @@ -515,6 +528,9 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) double precision :: r(3) double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume if(j1b_type .eq. 200) then @@ -578,8 +594,84 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - else + elseif(j1b_type .eq. 202) then + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' stop @@ -684,3 +776,76 @@ end function j12_mu_square ! --- +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f new file mode 100644 index 00000000..1100cd7c --- /dev/null +++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f @@ -0,0 +1,33 @@ +program plot_mu_of_r + implicit none + read_wf = .False. + touch read_wf + call routine_print + +end + + +subroutine routine_print + implicit none + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.mu_of_r' + i_unit_output = getUnitAndOpen(output,'w') + integer :: ipoint,nx + double precision :: xmax,xmin,r(3),dx + double precision :: mu_val, mu_der(3),dm_a,dm_b,grad + xmax = 5.D0 + xmin = -5.D0 + nx = 10000 + dx = (xmax - xmin)/dble(nx) + r = 0.d0 + r(1) = xmin + do ipoint = 1, nx + call mu_r_val_and_grad(r, r, mu_val, mu_der) + call dm_dft_alpha_beta_at_r(r,dm_a,dm_b) + grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 + grad = dsqrt(grad) + write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad + r(1) += dx + enddo +end diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f index 08913bab..ed663f02 100644 --- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I | H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f index 3a715b44..6d5c3b21 100644 --- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index b7129d36..1d1b26cc 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -1,4 +1,4 @@ -subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo @@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) end -subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 @@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 0cf3ca87..0c4198a9 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -49,12 +49,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index 50d9dd45..9cb9a600 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -36,11 +36,11 @@ subroutine routine e_corr_abs = 0.d0 e_corr_pos = 0.d0 e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f similarity index 88% rename from src/tc_bi_ortho/slater_tc_3e.irp.f rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 7b73d5f2..6abb6b78 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -1,23 +1,5 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort - endif -if(.not.double_normal_ord)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort -else - PROVIDE normal_two_body_bi_orth -endif -end -subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) +subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -108,7 +90,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) end -subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -203,7 +185,7 @@ end ! --- -subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a19d4688..3fd2576a 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,3 +1,26 @@ +subroutine provide_all_three_ints_bi_ortho + implicit none + BEGIN_DOC +! routine that provides all necessary three-electron integrals + END_DOC + if(three_body_h_tc)then + if(three_e_3_idx_term)then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + if(three_e_4_idx_term)then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + endif + if(.not.double_normal_ord.and.three_e_5_idx_term)then + PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort + PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then + PROVIDE normal_two_body_bi_orth + endif + endif +end + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 5a3f9935..531f0141 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -7,11 +7,11 @@ ! Various component of the TC energy for the reference "HF" Slater determinant END_DOC double precision :: hmono, htwoe, htot, hthree - call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc)then - call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree) + call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na @@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 1b0e43bb..2d6bfb27 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -42,13 +42,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! opposite spin two-body htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif endif @@ -59,13 +59,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) endif diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 2f9d83bf..7178d6d9 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h htwoe -= buffer_x(i) enddo hthree = 0.d0 - if (three_body_h_tc.and.elec_num.gt.2)then + if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then call three_comp_fock_elem(key_i,h,p,spin,hthree) endif diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f similarity index 85% rename from src/tc_bi_ortho/slater_tc.irp.f rename to src/tc_bi_ortho/slater_tc_slow.irp.f index 2c0ae2ca..1833d20f 100644 --- a/src/tc_bi_ortho/slater_tc.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) BEGIN_DOC ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) if(degree.gt.2)then htot = 0.d0 else - call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) endif -end subroutine htilde_mu_mat_bi_ortho_tot +end subroutine htilde_mu_mat_bi_ortho_tot_slow ! -- -subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) +subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) BEGIN_DOC ! @@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot if(degree.gt.2) return if(degree == 0)then - call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) else if (degree == 1)then - call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2)then - call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) endif if(three_body_h_tc) then if(degree == 2) then - if(.not.double_normal_ord) then - call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) + if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then + call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) endif - else if(degree == 1) then - call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) - else if(degree == 0) then - call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) + else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then + call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) + else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then + call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) endif endif @@ -89,7 +89,7 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) +subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) BEGIN_DOC ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS @@ -188,7 +188,7 @@ end -subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS @@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) return endif -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! do i = 1, Nint -! key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) -! key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) -! enddo -! call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) -! else call bitstring_to_list_ab(key_i, occ, Ne, Nint) -! endif call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) @@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) ! opposite spin two-body ! key_j, key_i htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ??? endif else @@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ??? htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ??? endif @@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) end -subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4ae44148..4c3c0788 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -11,10 +11,10 @@ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) dressing_dets = 0.d0 do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 @@ -29,7 +29,7 @@ vec_tmp(istate,istate) = 1.d0 enddo print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) do i = 1, N_det e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) enddo @@ -41,8 +41,8 @@ it = 0 dressing_dets = 0.d0 double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav + external htc_bi_ortho_calc_tdav_slow + external htcdag_bi_ortho_calc_tdav_slow logical :: converged do while (dabs(E_before-E_current).gt.thr) it += 1 @@ -66,7 +66,7 @@ do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) print*,'outside Davidson' print*,'eigval_tmp(1) = ',eigval_tmp(1) do i = 1, N_det diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a83d6cd0..fa946d6a 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -207,8 +207,6 @@ end else ! n_det > N_det_max_full double precision, allocatable :: H_jj(:),vec_tmp(:,:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt @@ -217,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' @@ -259,7 +257,6 @@ end do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. i_it = 0 diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 3353d3e7..ec072531 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -9,33 +9,25 @@ implicit none integer :: i, j - double precision :: hmono,htwoe,hthree,htot + double precision :: htot PROVIDE N_int i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) & + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det do j = 1, N_det ! < J | Htilde | I > - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !print *, ' hmono = ', hmono - !print *, ' htwoe = ', htwoe - !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo !$OMP END PARALLEL DO -! print*,'htilde_matrix_elmt_bi_ortho = ' -! do i = 1, min(100,N_det) -! write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i) -! enddo - END_PROVIDER diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f index 291c52ef..a7e4d09e 100644 --- a/src/tc_bi_ortho/tc_som.irp.f +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -56,8 +56,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..24bb7017 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -12,7 +12,7 @@ subroutine write_tc_energy() do i = 1, N_det do j = 1, N_det !htot = htilde_matrix_elmt_bi_ortho(i,j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot enddo @@ -45,7 +45,7 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) SIGMA_TC = SIGMA_TC + htot * htot enddo diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index 118e481a..cb0c355c 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -35,7 +35,7 @@ subroutine test det_i = ref_bitmask call do_single_excitation(det_i,h1,p1,s1,i_ok) call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree *= phase @@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4debe2e2..1f7bdfda 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo ! Preparing the left-eigenvector print*,'Computing the left-eigenvector ' diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 6721c285..df86ea65 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -31,7 +31,7 @@ subroutine test_h_u0 u_0(i) = psi_r_coef_bi_ortho(i,1) enddo call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det) + call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det) print*,'difference right ' accu = 0.d0 do i = 1, N_det @@ -42,7 +42,7 @@ subroutine test_h_u0 do_right = .False. v_0_new = 0.d0 call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right) + call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right) print*,'difference left' accu = 0.d0 do i = 1, N_det @@ -63,7 +63,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -99,7 +99,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -146,7 +146,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -183,7 +183,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -225,7 +225,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index ebd43a7a..b7de067f 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -25,8 +25,7 @@ subroutine test_3e implicit none double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) -! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree) + call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) print*,'hmono = ',hmono print*,'htwoe = ',htwoe print*,'hthree= ',hthree @@ -88,7 +87,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -156,7 +155,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index de638da9..a69f5bac 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml default: True +[three_e_3_idx_term] +type: logical +doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_4_idx_term] +type: logical +doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_5_idx_term] +type: logical +doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + [pure_three_body_h_tc] type: logical doc: If |true|, pure triple excitation three-body terms are included @@ -130,6 +148,12 @@ doc: a parameter used to define mu(r) interface: ezfio, provider, ocaml default: 6.203504908994001e-1 +[beta_rho_power] +type: double precision +doc: a parameter used to define mu(r) +interface: ezfio, provider, ocaml +default: 0.5 + [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 8606e908..8c11478e 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file interface: ezfio, ocaml, provider default: None -[export_rdm] +[export_basis] type: logical -doc: If True, export two-body reduced density matrix +doc: If True, export basis set and AOs interface: ezfio, ocaml, provider -default: False +default: True + +[export_mos] +type: logical +doc: If True, export basis set and AOs +interface: ezfio, ocaml, provider +default: True [export_ao_one_e_ints] type: logical @@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis interface: ezfio, ocaml, provider default: False -[export_mo_one_e_ints] -type: logical -doc: If True, export one-electron integrals in MO basis -interface: ezfio, ocaml, provider -default: False - [export_ao_two_e_ints] type: logical doc: If True, export two-electron integrals in AO basis @@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis interface: ezfio, ocaml, provider default: False +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + [export_mo_two_e_ints] type: logical doc: If True, export two-electron integrals in MO basis @@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis interface: ezfio, ocaml, provider default: False +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f index 3ae0dcb4..f9ecc17f 100644 --- a/src/trexio/export_trexio.irp.f +++ b/src/trexio/export_trexio.irp.f @@ -2,6 +2,6 @@ program export_trexio_prog implicit none read_wf = .True. SOFT_TOUCH read_wf - call export_trexio + call export_trexio(.False.) end diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c55ddc5e..f25ae370 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -1,15 +1,17 @@ -subroutine export_trexio +subroutine export_trexio(update) use trexio implicit none BEGIN_DOC ! Exports the wave function in TREXIO format END_DOC + logical, intent(in) :: update integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc integer :: k double precision, allocatable :: factor(:) character*(256) :: filenames(N_states) + character :: rw filenames(1) = trexio_filename do k=2,N_states @@ -18,15 +20,26 @@ subroutine export_trexio do k=1,N_states print *, 'TREXIO file : ', trim(filenames(k)) - call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + if (update) then + call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + else + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + endif enddo print *, '' + if (update) then + rw = 'u' + else + rw = 'w' + endif + + do k=1,N_states if (backend == 0) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc) else if (backend == 1) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc) endif if (f(k) == 0_8) then print *, 'Unable to open TREXIO file for writing' @@ -171,92 +184,95 @@ subroutine export_trexio endif + if (export_basis) then + ! Basis ! ----- - print *, 'Basis' + print *, 'Basis' + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f(1), prim_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f(1), shell_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) - call trexio_assert(rc, TREXIO_SUCCESS) + allocate(factor(shell_num)) +! if (ao_normalized) then +! factor(1:shell_num) = shell_normalization_factor(1:shell_num) +! else + factor(1:shell_num) = 1.d0 +! endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(shell_num)) - if (ao_normalized) then - factor(1:shell_num) = shell_normalization_factor(1:shell_num) - else - factor(1:shell_num) = 1.d0 - endif - rc = trexio_write_basis_shell_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - deallocate(factor) + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_index(f(1), shell_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f(1), prim_expo) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f(1), prim_coef) - call trexio_assert(rc, TREXIO_SUCCESS) - - allocate(factor(prim_num)) - if (primitives_normalized) then - factor(1:prim_num) = prim_normalization_factor(1:prim_num) - else - factor(1:prim_num) = 1.d0 - endif - rc = trexio_write_basis_prim_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) ! Atomic orbitals ! --------------- - print *, 'AOs' + print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - allocate(factor(ao_num)) - if (ao_normalized) then - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - else - factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) ! One-e AO integrals ! ------------------ @@ -375,28 +391,30 @@ subroutine export_trexio ! Molecular orbitals ! ------------------ - print *, 'MOs' + if (export_mos) then + print *, 'MOs' - rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) - call trexio_assert(rc, TREXIO_SUCCESS) - - do k=1,N_states - rc = trexio_write_mo_num(f(k), mo_num) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - if ( (trim(mo_label) == 'Canonical').and. & - (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) - call trexio_assert(rc, TREXIO_SUCCESS) - ! One-e MO integrals ! ------------------ diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 9f9ad9d6..8c6b79d7 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -3,6 +3,7 @@ program import_integrals_ao implicit none integer(trexio_t) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc + PROVIDE mo_num f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) if (f == 0_8) then @@ -42,10 +43,10 @@ subroutine run(f) if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) - call trexio_assert(rc, TREXIO_SUCCESS) if (rc /= TREXIO_SUCCESS) then print *, irp_here, rc print *, 'Error reading nuclear repulsion' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_nuclei_nuclear_repulsion(s) @@ -63,6 +64,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO overlap' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) @@ -74,6 +76,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO kinetic integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) @@ -85,6 +88,7 @@ subroutine run(f) ! if (rc /= TREXIO_SUCCESS) then ! print *, irp_here ! print *, 'Error reading AO ECP local integrals' +! call trexio_assert(rc, TREXIO_SUCCESS) ! stop -1 ! endif ! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) @@ -96,6 +100,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO potential N-e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) @@ -106,41 +111,112 @@ subroutine run(f) ! AO 2e integrals ! --------------- - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int(f) + PROVIDE ao_num + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map - integer*8 :: offset, icount + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + else + print *, 'AO integrals not found in TREXIO file' + endif + + ! MO integrals + ! ------------ + + allocate(A(mo_num, mo_num)) + if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then + rc = trexio_read_mo_1e_int_core_hamiltonian(f, A) if (rc /= TREXIO_SUCCESS) then - exit + print *, irp_here + print *, 'Error reading MO 1e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) + stop -1 endif - end do - n_integrals = offset + call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A) + call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read') + endif + deallocate(A) - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + ! MO 2e integrals + ! --------------- - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + rc = trexio_has_mo_2e_int(f) + if (rc /= TREXIO_HAS_NOT) then + + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + else + print *, 'MO integrals not found in TREXIO file' + endif end diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index ff17ee4e..b548b18a 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,7 +86,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 @@ -468,8 +468,6 @@ end subroutine - - subroutine multiply_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -484,33 +482,292 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ndtmp integer :: ib, ic, id, k - if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0 - continue - else - return - endif - ndtmp = nb+nc + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + select case (nb) + case (0) + call multiply_poly_b0(b,c,nc,d,nd) + return + case (1) + call multiply_poly_b1(b,c,nc,d,nd) + return + case (2) + call multiply_poly_b2(b,c,nc,d,nd) + return + end select + + select case (nc) + case (0) + call multiply_poly_c0(b,nb,c,d,nd) + return + case (1) + call multiply_poly_c1(b,nb,c,d,nd) + return + case (2) + call multiply_poly_c2(b,nb,c,d,nd) + return + end select + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b0(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:nc) + + integer :: ndtmp + integer :: ic, id, k + if(nc < 0) return !False if nc>=0 do ic = 0,nc d(ic) = d(ic) + c(ic) * b(0) enddo - do ib=1,nb - d(ib) = d(ib) + c(0) * b(ib) - do ic = 1,nc - d(ib+ic) = d(ib+ic) + c(ic) * b(ib) - enddo - enddo - - do nd = ndtmp,0,-1 - if (d(nd) == 0.d0) then - cycle - endif - exit + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit enddo end +subroutine multiply_poly_b1(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ic = 1,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + + end select + + do nd = 1+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b2(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + d(2) = d(2) + c(0) * b(2) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + d(3) = d(3) + c(1) * b(2) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + c(1) * b(2) + d(4) = d(4) + c(2) * b(2) + + case default + + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ic = 2,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + c(ic-2) * b(2) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + c(nc-1) * b(2) + d(nc+2) = d(nc+2) + c(nc) * b(2) + + end select + + do nd = 2+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c0(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:0) + double precision, intent(inout) :: d(0:nb) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + do ib=0,nb + d(ib) = d(ib) + c(0) * b(ib) + enddo + + do nd = nb,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c1(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:1) + double precision, intent(inout) :: d(0:nb+1) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ib=1,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + + end select + + do nd = nb+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c2(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + + + subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) implicit none BEGIN_DOC @@ -685,11 +942,11 @@ end subroutine recentered_poly2_v subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points) BEGIN_DOC - ! + ! ! Recenter two polynomials. Special case for b=(0,0,0) - ! + ! ! (x - A)^a (x - B)^0 = (x - P + P - A)^a (x - Q + Q - B)^0 - ! = (x - P + P - A)^a + ! = (x - P + P - A)^a ! END_DOC diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3b43d607..69873bc0 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! -integer :: ndim -integer, intent(inout) :: rank -double precision, dimension(ndim, ndim), intent(inout) :: A -double precision, dimension(ndim, rank), intent(out) :: U -double precision, intent(in) :: tol +integer :: ndim +integer, intent(inout) :: rank +double precision, intent(inout) :: A(ndim, ndim) +double precision, intent(out) :: U(ndim, rank) +double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work character, parameter :: uplo = "U" -integer :: N, LDA +integer :: LDA integer :: info integer :: k, l, rank0 -external :: dpstrf rank0 = rank -N = size(A, dim=1) -LDA = N -allocate(piv(N)) -allocate(work(2*N)) -call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info) +LDA = ndim +allocate(piv(ndim)) +allocate(work(2*ndim)) +call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info) if (rank > rank0) then print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling' stop end if -do k = 1, N - A(k+1:, k) = 0.00D+0 +do k = 1, ndim + A(k+1:ndim, k) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 -do k = 1, N +do k = 1, ndim l = piv(k) - U(l, :) = A(1:rank, k) + U(l, 1:rank) = A(1:rank, k) end do end subroutine pivoted_cholesky diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f index 33e0cbae..fc1451ba 100644 --- a/src/utils_cc/energy.irp.f +++ b/src/utils_cc/energy.irp.f @@ -5,9 +5,8 @@ subroutine det_energy(det,energy) integer(bit_kind), intent(in) :: det double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem - call i_H_j(det,det,N_int,energy) + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion - energy = energy + nuclear_repulsion - end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 9e244d82..485d7002 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) integer :: i1,i2,idx1,idx2 allocate(tmp_F(mo_num,mo_num)) - + call get_fock_matrix_spin(det,1,tmp_F) !$OMP PARALLEL & @@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) !$OMP END PARALLEL deallocate(tmp_F) - + end ! V @@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) double precision, intent(out) :: v(n1,n2,n3,n4) - integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO do i4 = 1, n4 - do i3 = 1, n3 - do i2 = 1, n2 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) do i1 = 1, n1 - idx4 = list4(i4) - idx3 = list3(i3) - idx2 = list2(i2) idx1 = list1(i1) - v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + + end ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] - implicit none - - integer :: i,j,k,l - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map - + integer :: i1,i2,i3,i4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & - !$OMP PRIVATE(i,j,k,l) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do l = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do i = 1, mo_num - cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + END_PROVIDER ! oooo @@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) - + do q = 1, cc_n_mo do p = 1, cc_n_mo cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) @@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] enddo FREE cc_space_v_vvvv - + END_PROVIDER ! iaia @@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) - + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) !$OMP PARALLEL & @@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) - + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) !$OMP PARALLEL & @@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift) else shift = n_S(1) endif - + end ! F @@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) ! Compute the Fock matrix corresponding to two lists of spin orbitals. ! Ex: occ/occ, occ/vir,... END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) integer, intent(in) :: list1(n1,2), list2(n2,2) integer, intent(in) :: dim1, dim2 - + double precision, intent(out) :: f(dim1, dim2) double precision, allocatable :: tmp_F(:,:) integer :: i,j, idx_i,idx_j,i_shift,j_shift integer :: tmp_i,tmp_j integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals allocate(tmp_F(mo_num,mo_num)) - + do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 @@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) enddo enddo - + deallocate(tmp_F) - + end ! Get F @@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f) BEGIN_DOC ! Fock matrix alpha or beta of an arbitrary det END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: s - + double precision, intent(out) :: f(mo_num,mo_num) - + integer :: p,q,i,s1,s2 integer(bit_kind) :: res(N_int,2) logical :: ok @@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f) s1 = 2 s2 = 1 endif - + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + !$OMP PARALLEL & - !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & !$OMP PRIVATE(p,q,ok,i,res)& !$OMP DEFAULT(NONE) !$OMP DO collapse(1) @@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f) do i = 1, mo_num call apply_hole(det, s1, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) endif enddo do i = 1, mo_num call apply_hole(det, s2, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) endif enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + end ! V @@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer :: si,sj,sk,sl,s PROVIDE cc_space_v - + !$OMP PARALLEL & !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 @@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then @@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo !$OMP END DO endif - + enddo enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx @@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, call shift_idx_spin(sl,n4_S,l_shift) tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - + !$OMP PARALLEL & !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & !$OMP i,j,k,idx_i,idx_j,idx_k,& !$OMP tmp_i,tmp_j,tmp_k)& !$OMP DEFAULT(NONE) - + do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_ij_l @@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sk,n3_S,k_shift) tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - + !$OMP PARALLEL & !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & !$OMP i,j,l,idx_i,idx_j,idx_l,& !$OMP tmp_i,tmp_j,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_i_kl @@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sj,n2_S,j_shift) tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - + !$OMP PARALLEL & !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & !$OMP i,k,l,idx_i,idx_k,idx_l,& !$OMP tmp_i,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f index dbd4f4bd..0cf8626c 100644 --- a/src/utils_cc/update_t.irp.f +++ b/src/utils_cc/update_t.irp.f @@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1) !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,a) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(1) + !$OMP DO do a = 1, nV do i = 1, nO t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) @@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2) !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO