diff --git a/configure b/configure index 66bc9419..48e6fd12 100755 --- a/configure +++ b/configure @@ -215,7 +215,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ EOF @@ -229,7 +228,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files EOF diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 841089ea..506cf069 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -44,8 +44,12 @@ end = struct let get_default = Qpackage.get_ezfio_default "ao_basis";; let read_ao_basis () = - Ezfio.get_ao_basis_ao_basis () - |> AO_basis_name.of_string + let result = + Ezfio.get_ao_basis_ao_basis () + in + if result <> "None" then + AO_basis_name.of_string result + else failwith "No basis" ;; let read_ao_num () = @@ -192,7 +196,7 @@ end = struct ao_expo ; ao_cartesian ; ao_normalized ; - primitives_normalized ; + primitives_normalized ; } = b in write_md5 b ; @@ -207,7 +211,7 @@ end = struct Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - let ao_nucl = + let ao_nucl = Array.to_list ao_nucl |> list_map Nucl_number.to_int in @@ -215,7 +219,7 @@ end = struct ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; let ao_power = - let l = Array.to_list ao_power in + let l = Array.to_list ao_power in List.concat [ (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ; (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ; @@ -227,7 +231,7 @@ end = struct Ezfio.set_ao_basis_ao_cartesian(ao_cartesian); Ezfio.set_ao_basis_ao_normalized(ao_normalized); Ezfio.set_ao_basis_primitives_normalized(primitives_normalized); - + let ao_coef = Array.to_list ao_coef |> list_map AO_coef.to_float @@ -267,7 +271,10 @@ end = struct |> Ezfio.set_ao_basis_ao_md5 ; Some result with - | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) + | _ -> ( "None" + |> Digest.string + |> Digest.to_hex + |> Ezfio.set_ao_basis_ao_md5 ; None) ;; @@ -276,7 +283,7 @@ end = struct to_basis b |> Long_basis.of_basis |> Array.of_list - and unordered_basis = + and unordered_basis = to_long_basis b |> Array.of_list in @@ -289,15 +296,15 @@ end = struct (a.(i) <- None ; i) else find x a (i+1) - and find2 (s,g,n) a i = + and find2 (s,g,n) a i = if i = Array.length a then -1 else - match a.(i) with + match a.(i) with | None -> find2 (s,g,n) a (i+1) | Some (s', g', n') -> if s <> s' || n <> n' then find2 (s,g,n) a (i+1) else - let lc = list_map (fun (prim, _) -> prim) g.Gto.lc + let lc = list_map (fun (prim, _) -> prim) g.Gto.lc and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc in if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i) @@ -313,13 +320,13 @@ end = struct let ao_num = List.length long_basis |> AO_number.of_int in let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc - |> AO_prim_number.of_int ) long_basis + |> AO_prim_number.of_int ) long_basis |> Array.of_list and ao_nucl = - list_map (fun (_,_,n) -> n) long_basis + list_map (fun (_,_,n) -> n) long_basis |> Array.of_list and ao_power = - list_map (fun (x,_,_) -> x) long_basis + list_map (fun (x,_,_) -> x) long_basis |> Array.of_list in let ao_prim_num_max = Array.fold_left (fun s x -> @@ -329,16 +336,16 @@ end = struct in let gtos = - list_map (fun (_,x,_) -> x) long_basis + list_map (fun (_,x,_) -> x) long_basis in let create_expo_coef ec = let coefs = begin match ec with | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos + list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos | `Expos -> list_map (fun x-> list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc ) gtos + prim.GaussianPrimitive.expo) x.Gto.lc ) gtos end in let rec get_n n accu = function @@ -360,7 +367,7 @@ end = struct let ao_coef = create_expo_coef `Coefs |> Array.of_list |> Array.map AO_coef.of_float - and ao_expo = create_expo_coef `Expos + and ao_expo = create_expo_coef `Expos |> Array.of_list |> Array.map AO_expo.of_float in @@ -372,7 +379,7 @@ end = struct } ;; - let reorder b = + let reorder b = let order = ordering b in let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max @@ -464,7 +471,7 @@ Basis set (read-only) :: | line :: tail -> let line = String.trim line in if line = "Basis set (read-only) ::" then - String.concat "\n" tail + String.concat "\n" tail else extract_basis tail in diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml index a4e6176a..832b464e 100644 --- a/ocaml/Input_mo_basis.ml +++ b/ocaml/Input_mo_basis.ml @@ -56,7 +56,10 @@ end = struct let read_ao_md5 () = let ao_md5 = match (Input_ao_basis.Ao_basis.read ()) with - | None -> failwith "Unable to read AO basis" + | None -> ("None" + |> Digest.string + |> Digest.to_hex + |> MD5.of_string) | Some result -> Input_ao_basis.Ao_basis.to_md5 result in let result = diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py similarity index 69% rename from src/trexio/qp_import_trexio.py rename to scripts/qp_import_trexio.py index de8d1269..89096387 100755 --- a/src/trexio/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -13,12 +13,17 @@ Options: import sys import os -import trexio import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +try: + import trexio +except ImportError: + print("Error: trexio python module is not found. Try python3 -m pip install trexio") + sys.exit(1) + try: QP_ROOT = os.environ["QP_ROOT"] @@ -90,14 +95,15 @@ def write_ezfio(trexio_filename, filename): p = re.compile(r'(\d*)$') label = [p.sub("", x).capitalize() for x in label] ezfio.set_nuclei_nucl_label(label) + print("OK") else: ezfio.set_nuclei_nucl_num(1) ezfio.set_nuclei_nucl_charge([0.]) ezfio.set_nuclei_nucl_coord([0.,0.,0.]) ezfio.set_nuclei_nucl_label(["X"]) + print("None") - print("OK") print("Electrons\t...\t", end=' ') @@ -105,12 +111,12 @@ def write_ezfio(trexio_filename, filename): try: num_beta = trexio.read_electron_dn_num(trexio_file) except: - num_beta = sum(charge)//2 + num_beta = int(sum(charge))//2 try: num_alpha = trexio.read_electron_up_num(trexio_file) except: - num_alpha = sum(charge) - num_beta + num_alpha = int(sum(charge)) - num_beta if num_alpha == 0: print("\n\nError: There are zero electrons in the TREXIO file.\n\n") @@ -118,7 +124,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_electrons_elec_alpha_num(num_alpha) ezfio.set_electrons_elec_beta_num(num_beta) - print("OK") + print(f"{num_alpha} {num_beta}") print("Basis\t\t...\t", end=' ') @@ -126,60 +132,113 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() not in ["gaussian", "slater"]: - raise TypeError + if basis_type.lower() in ["gaussian", "slater"]: + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) - shell_num = trexio.read_basis_shell_num(trexio_file) - prim_num = trexio.read_basis_prim_num(trexio_file) - ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) - nucl_index = trexio.read_basis_nucleus_index(trexio_file) - exponent = trexio.read_basis_exponent(trexio_file) - coefficient = trexio.read_basis_coefficient(trexio_file) - shell_index = trexio.read_basis_shell_index(trexio_file) - ao_shell = trexio.read_ao_shell(trexio_file) + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) - ezfio.set_basis_basis("Read from TREXIO") - ezfio.set_basis_shell_num(shell_num) - ezfio.set_basis_prim_num(prim_num) - ezfio.set_basis_shell_ang_mom(ang_mom) - ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) - ezfio.set_basis_prim_expo(exponent) - ezfio.set_basis_prim_coef(coefficient) + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) - nucl_shell_num = [] - prev = None - m = 0 - for i in ao_shell: - if i != prev: - m += 1 - if prev is None or nucl_index[i] != nucl_index[prev]: - nucl_shell_num.append(m) - m = 0 - prev = i - assert (len(nucl_shell_num) == nucl_num) + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) - shell_prim_num = [] - prev = shell_index[0] - count = 0 - for i in shell_index: - if i != prev: - shell_prim_num.append(count) - count = 0 - count += 1 - prev = i - shell_prim_num.append(count) + assert (len(shell_prim_num) == shell_num) - assert (len(shell_prim_num) == shell_num) - - ezfio.set_basis_shell_prim_num(shell_prim_num) - ezfio.set_basis_shell_index([x+1 for x in shell_index]) - ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) - shell_factor = trexio.read_basis_shell_factor(trexio_file) - prim_factor = trexio.read_basis_prim_factor(trexio_file) + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) - print("OK") + elif basis_type.lower() == "numerical": + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = shell_num + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = [1.]*prim_num + coefficient = [1.]*prim_num + shell_index = [i for i in range(shell_num)] + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("None") + ezfio.set_ao_basis_ao_basis("None") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = [1.]*prim_num + else: + raise TypeError + + print(basis_type) except: print("None") ezfio.set_ao_basis_ao_cartesian(True) @@ -256,9 +315,11 @@ def write_ezfio(trexio_filename, filename): # ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) - ezfio.set_ao_basis_ao_basis("Read from TREXIO") - print("OK") + print("OK") + + else: + print("None") # _ @@ -279,6 +340,7 @@ def write_ezfio(trexio_filename, filename): except: label = "None" ezfio.set_mo_basis_mo_label(label) + ezfio.set_determinants_mo_label(label) try: clss = trexio.read_mo_class(trexio_file) @@ -303,10 +365,10 @@ def write_ezfio(trexio_filename, filename): for i in range(num_beta): mo_occ[i] += 1. ezfio.set_mo_basis_mo_occ(mo_occ) + print("OK") except: - pass + print("None") - print("OK") print("Pseudos\t\t...\t", end=' ') @@ -386,9 +448,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + print("OK") - - print("OK") + else: + print("None") diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index d4c995e6..9f523fca 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,19 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + +[ao_cholesky_threshold] +type: Threshold +doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero +interface: ezfio,provider,ocaml +default: 1.e-12 + [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..bb81b141 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - integer :: i,j,k,l - double precision :: xnorm0, x, integral - double precision, external :: ao_two_e_integral - - cholesky_ao_num_guess = 0 - xnorm0 = 0.d0 - x = 0.d0 - do j=1,ao_num - do i=1,ao_num - integral = ao_two_e_integral(i,i,j,j) - if (integral > ao_integrals_threshold) then - cholesky_ao_num_guess += 1 - else - x += integral - endif - enddo - enddo - print *, 'Cholesky decomposition of AO integrals' - print *, '--------------------------------------' - print *, '' - print *, 'Estimated Error: ', x - print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' - + cholesky_ao_num_guess = ao_num*ao_num / 2 END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -39,7 +17,7 @@ END_PROVIDER END_DOC type(c_ptr) :: ptr - integer :: fd, i,j,k,l, rank + integer :: fd, i,j,k,l,m,rank double precision, pointer :: ao_integrals(:,:,:,:) double precision, external :: ao_two_e_integral @@ -49,28 +27,90 @@ END_PROVIDER 8, fd, .False., ptr) call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - double precision :: integral + print*, 'Providing the AO integrals (Cholesky)' + call wall_time(wall_1) + call cpu_time(cpu_1) + + ao_integrals = 0.d0 + + double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) - do l=1,ao_num - do j=1,l - do k=1,ao_num - do i=1,k - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - enddo + double precision, external :: get_ao_two_e_integral + + if (read_ao_two_e_integrals) then + PROVIDE ao_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + + endif ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 82ffbc90..835dc89a 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -590,8 +590,20 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) +! call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) + integer :: ib, ic + if (ior(n_Ix,n_Iy) >= 0) then + do ib=0,n_Ix + do ic = 0,n_Iy + d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib) + enddo + enddo + + do n_pt_tmp = n_Ix+n_Iy, 0, -1 + if (d_poly(n_pt_tmp) /= 0.d0) exit + enddo + endif + if (n_pt_tmp == -1) then return endif @@ -600,8 +612,21 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) +! call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) + if (ior(n_pt_tmp,n_Iz) >= 0) then + ! Bottleneck here + do ib=0,n_pt_tmp + do ic = 0,n_Iz + d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib) + enddo + enddo + + do n_pt_out = n_pt_tmp+n_Iz, 0, -1 + if (d1(n_pt_out) /= 0.d0) exit + enddo + endif + + double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -948,8 +973,20 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= dble(a-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -970,8 +1007,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif endif ny=0 @@ -988,9 +1036,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1028,8 +1086,20 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 @@ -1039,8 +1109,19 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end @@ -1067,8 +1148,20 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1086,8 +1179,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1097,9 +1201,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1146,8 +1260,21 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + integer :: ib + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif + return case default @@ -1164,8 +1291,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) X(ix) *= dble(c-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_01,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_01,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_01(0) * X(ib) + d(ib+1) = d(ib+1) + B_01(1) * X(ib) + d(ib+2) = d(ib+2) + B_01(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1174,8 +1312,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end select end @@ -1233,3 +1382,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) enddo end + + +subroutine multiply_poly_local(b,nb,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:nc) + double precision, intent(inout) :: d(0:nb+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b63375cf..1467d9a4 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,5 +1,5 @@ subroutine run_ccsd_space_orb - + implicit none integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d @@ -12,34 +12,30 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) - + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) - integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) - - PROVIDE mo_two_e_integrals_in_map + integer :: nO, nV, nOa, nVa + +! PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) + nOa = cc_nOa + nVa = cc_nVa ! Check that the reference is a closed shell determinant if (cc_ref_is_open_shell) then call abort endif - + ! Number of occ/vir spatial orb nO = nOa nV = nVa - + allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir @@ -47,7 +43,7 @@ subroutine run_ccsd_space_orb !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) !print*,'occ',list_occ !print*,'vir',list_vir - + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) @@ -76,7 +72,7 @@ subroutine run_ccsd_space_orb print*,'Det energy', uncorr_energy call ccsd_energy_space(nO,nV,tau,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy - + nb_iter = 0 not_converged = .True. max_r1 = 0d0 @@ -86,9 +82,9 @@ subroutine run_ccsd_space_orb write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(ta) - + do while (not_converged) - + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vo(nO,nV,t1,t2,H_vo) @@ -97,7 +93,7 @@ subroutine run_ccsd_space_orb call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) max_r = max(max_r1,max_r2) - + ! Update if (cc_update_method == 'diis') then !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) @@ -109,7 +105,7 @@ subroutine run_ccsd_space_orb call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) else - print*,'Unkonw cc_method_method: '//cc_update_method + print*,'Unkown cc_method_method: '//cc_update_method endif call update_tau_space(nO,nV,t1,t2,tau) @@ -122,7 +118,7 @@ subroutine run_ccsd_space_orb if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then not_converged = .False. endif - + enddo write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(tb) @@ -141,18 +137,18 @@ subroutine run_ccsd_space_orb call write_t1(nO,nV,t1) call write_t2(nO,nV,t2) - + ! Deallocation if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif deallocate(H_vv,H_oo,H_vo,r1,r2,tau) - + ! CCSD(T) double precision :: e_t - if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then ! Dumb way !call wall_time(ta) @@ -169,8 +165,13 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + + e_t = uncorr_energy + energy ! For print in next call + call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) print*,'Time: ',tb-ta, ' s' @@ -180,7 +181,7 @@ subroutine run_ccsd_space_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' print*,'' endif - + print*,'Reference determinant:' call print_det(det,N_int) @@ -211,8 +212,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) !$omp default(none) e = 0d0 !$omp do - do i = 1, nO - do a = 1, nV + do a = 1, nV + do i = 1, nO e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) enddo enddo @@ -232,7 +233,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) energy = energy + e !$omp end critical !$omp end parallel - + end ! Tau @@ -250,12 +251,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) ! internal integer :: i,j,a,b - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -267,7 +268,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) enddo !$OMP END DO !$OMP END PARALLEL - + end ! R1 @@ -283,7 +284,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! out double precision, intent(out) :: r1(nO,nV), max_r1 - + ! internal integer :: u,i,j,beta,a,b @@ -304,7 +305,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) - ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO @@ -324,7 +325,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call dgemm('T','N', nO, nV, nO, & 1d0, X_oo, size(X_oo,2), & t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) + 1d0, r1 , size(r1,1)) deallocate(X_oo) ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) @@ -373,7 +374,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -385,16 +386,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + call dgemv('T', nV*nO, nO*nV, & 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & H_vo , 1, & 1d0, r1 , 1) - + deallocate(X_voov) ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) - ! <=> + ! <=> ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) !do beta = 1, nV ! do u = 1, nO @@ -412,7 +413,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -429,17 +430,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & t1 , 1, & 1d0, r1 , 1) - + deallocate(X_ovov) - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) - ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do a = 1, nV ! do b = 1, nV - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) ! enddo ! enddo ! enddo @@ -452,24 +453,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) enddo enddo enddo enddo !$omp end do nowait - !$omp do collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do u = 1, nO - T_vvoo(a,b,i,u) = tau(i,u,a,b) + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo enddo @@ -481,17 +482,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_vvov,T_vvoo) - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) - ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do j = 1, nO ! do a = 1, nV - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! enddo ! enddo ! enddo @@ -504,8 +505,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) - !$omp do collapse(3) do u = 1, nO + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -513,23 +514,21 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('T','N', nO, nV, nO*nO*nV, & -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & tau , size(tau,1) * size(tau,2) * size(tau,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_oovo) max_r1 = 0d0 do a = 1, nV do i = 1, nO - if (dabs(r1(i,a)) > max_r1) then - max_r1 = dabs(r1(i,a)) - endif + max_r1 = max(dabs(r1(i,a)), max_r1) enddo enddo @@ -538,7 +537,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,r1) & !$omp private(a,i) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do i = 1, nO r1(i,a) = -r1(i,a) @@ -546,7 +545,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + end ! H_oo @@ -578,7 +577,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -601,8 +600,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 1d0, tau , size(tau,1), & cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) - + 1d0, H_oo , size(H_oo,1)) + end ! H_vv @@ -633,7 +632,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -656,13 +655,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - - !$omp do collapse(3) + + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) enddo enddo enddo @@ -676,7 +675,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) 1d0, H_vv , size(H_vv,1)) deallocate(tmp_tau) - + end ! H_vo @@ -704,7 +703,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! enddo ! enddo - ! + ! ! enddo !enddo @@ -727,7 +726,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -746,7 +745,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) 1d0, H_vo, 1) deallocate(w) - + end ! R2 @@ -765,13 +764,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! internal double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:) integer :: u,v,i,j,beta,gam,a,b allocate(g_occ(nO,nO), g_vir(nV,nV)) allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) allocate(A1(nO,nO,nO,nO)) - + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) call compute_A1(nO,nV,t1,t2,tau,A1) @@ -787,7 +786,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -835,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! enddo !enddo - allocate(B1(nV,nV,nV,nV)) - call compute_B1(nO,nV,t1,t2,B1) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1,1) * size(B1,2), & - 1d0, r2, size(r2,1) * size(r2,2)) - deallocate(B1) +! allocate(B1(nV,nV,nV,nV)) +! call compute_B1(nO,nV,t1,t2,B1) + allocate(B1_gam(nV,nV,nV)) + do gam=1,nV + call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) + call dgemm('N','N',nO*nO,nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1_gam , size(B1_gam,1) * size(B1_gam,2), & + 1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2)) + enddo + deallocate(B1_gam) + !do gam = 1, nV ! do beta = 1, nV @@ -863,7 +867,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -875,7 +879,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & @@ -885,7 +889,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +925,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +961,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +983,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -991,7 +995,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1009,13 +1013,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1023,8 +1027,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nV*nO*nV,nV,nO, & @@ -1036,12 +1040,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, t1, size(t1,1), & Y_vovv, size(Y_vovv,1), & 0d0, X_oovv, size(X_oovv,1)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1055,7 +1059,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_vovo,Y_vovv) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1079,7 +1083,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1092,7 +1096,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1111,13 +1115,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1125,8 +1129,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO,nO*nV*nO,nV, & @@ -1138,12 +1142,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1155,7 +1159,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_vovo,Y_oovo) !do gam = 1, nV @@ -1182,19 +1186,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO - X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1206,17 +1210,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1228,9 +1232,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_ovvo,Y_voov) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1252,7 +1256,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1268,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1281,12 +1285,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1298,7 +1302,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1319,7 +1323,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do i = 1, nO do gam = 1, nV @@ -1331,7 +1335,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do beta = 1, nV do v = 1, nO do a = 1, nV @@ -1343,17 +1347,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1367,13 +1371,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_ovov,Y_ovov,Z_ovov) - + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2) & !$omp private(i,j,a,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do a = 1, nV do j = 1, nO @@ -1385,22 +1389,20 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + max_r2 = 0d0 do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - if (dabs(r2(i,j,a,b)) > max_r2) then - max_r2 = dabs(r2(i,j,a,b)) - endif + max_r2 = max(r2(i,j,a,b), max_r2) enddo enddo enddo enddo deallocate(g_occ,g_vir,J1,K1,A1) - + end ! A1 @@ -1429,12 +1431,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) = A1(u,v,i,j) & ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) - ! + ! ! do b = 1, nV ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - ! enddo + ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1442,13 +1444,13 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) - + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) !$omp parallel & !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1462,7 +1464,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do u = 1, nO @@ -1484,7 +1486,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1496,7 +1498,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) enddo !$omp end do !$omp end parallel - + deallocate(X_vooo,Y_oooo) ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) @@ -1510,11 +1512,95 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) 1d0, tau , size(tau,1) * size(tau,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & 1d0, A1 , size(A1,1) * size(A1,2)) - + end ! B1 +subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) + + implicit none + + integer, intent(in) :: nO,nV,gam + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! +! do i = 1, nO +! B1(a,b,beta) = B1(a,b,beta) & +! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & +! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) +! enddo +! +! enddo +! enddo +! enddo + + double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) +! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + !$omp end do nowait + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + +! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, 1, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1(1,gam), size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta) + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + subroutine compute_B1(nO,nV,t1,t2,B1) implicit none @@ -1532,28 +1618,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1) ! do beta = 1, nV ! do b = 1, nV ! do a = 1, nV - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) ! do i = 1, nO ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo - + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1564,8 +1650,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1573,17 +1659,17 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & t1 , size(t1,1), & 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) - + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & @@ -1594,7 +1680,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1606,9 +1692,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do !$omp end parallel - + deallocate(X_vvvo,Y_vvvv) - + end ! g_occ @@ -1629,14 +1715,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) !do i = 1, nO ! do u = 1, nO ! g_occ(u,i) = H_oo(u,i) - ! + ! ! do a = 1, nV ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) - ! + ! ! do j = 1, nO ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) ! enddo - ! + ! ! enddo ! enddo !enddo @@ -1657,8 +1743,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo enddo !$omp end do - - !$omp do collapse(1) + + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1670,7 +1756,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do !$omp end parallel - + end ! g_vir @@ -1691,23 +1777,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) !do beta = 1, nV ! do a = 1, nV ! g_vir(a,beta) = H_vv(a,beta) - ! + ! ! do i = 1, nO ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) - ! + ! ! do b = 1, nV ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) ! enddo - ! + ! ! enddo ! enddo !enddo - + call dgemm('N','N',nV,nV,nO, & -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - + !$omp parallel & !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & !$omp private(i,b,a,beta) & @@ -1720,7 +1806,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do collapse(1) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1732,7 +1818,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do !$omp end parallel - + end ! J1 @@ -1765,7 +1851,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! do b = 1, nV ! J1(u,a,beta,i) = J1(u,a,beta,i) & - ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) ! enddo ! do j = 1, nO @@ -1775,7 +1861,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1783,13 +1869,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) - + !$omp parallel & !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1797,10 +1883,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do a = 1, nV @@ -1812,7 +1898,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV*nO,nV,nO, & -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & t1 , size(t1,1), & @@ -1822,8 +1908,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Y_ovov) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1831,8 +1917,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovoo) @@ -1849,7 +1935,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1861,7 +1947,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1886,8 +1972,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,12 +1981,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,11 +1994,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - - !$omp do collapse(3) + do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1920,10 +2006,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & @@ -1933,8 +2019,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1942,12 +2028,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) - + deallocate(X_ovvo,Z_ovvo,Y_ovov) + end ! K1 @@ -1982,7 +2068,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! do b = 1, nV ! K1(u,a,i,beta) = K1(u,a,i,beta) & - ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) ! enddo ! do j = 1, nO @@ -1991,19 +2077,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2015,8 +2101,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do j = 1, nO do b = 1, nV @@ -2024,11 +2110,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -2036,8 +2122,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO*nV*nO,nV,nO, & @@ -2060,7 +2146,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,Z) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2074,5 +2160,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end parallel deallocate(X,Y,Z) - + end diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 3b762a06..1aab6bd7 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -10,51 +10,43 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) - double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) - double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) - double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc + double precision :: e,ta,tb - !allocate(W(nV,nV,nV,nO,nO,nO)) - !allocate(V(nV,nV,nV,nO,nO,nO)) - allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) - allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) - allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) - allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - ! Temporary arrays !$OMP PARALLEL & - !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV - X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV - T_vvoo(d,c,k,j) = t2(k,j,c,d) + T_voov(d,k,j,c) = t2(k,j,c,d) enddo enddo enddo @@ -62,191 +54,399 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END DO nowait !v_vooo(c,j,k,l) * t2(i,l,a,b) & - !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do c = 1, nV - do l = 1, nO - X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !$OMP DO collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do l = 1, nO - T_ovvo(l,a,b,i) = t2(i,l,a,b) - enddo - enddo - enddo - enddo - !$OMP END DO nowait - - !v_vvoo(b,c,j,k) * t1(i,a) & - !X_vvoo(b,c,k,j) * T1_vo(a,i) & - - !$OMP DO collapse(3) - do j = 1, nO + !$OMP DO + do c = 1, nV do k = 1, nO - do c = 1, nV - do b = 1, nV - X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(1) - do i = 1, nO + !$OMP DO + do b = 1, nV do a = 1, nV - T_vo(a,i) = t1(i,a) + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO nowait - call wall_time(ta) - energy = 0d0 + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO do c = 1, nV do b = 1, nV - do a = 1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) - call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) - call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) - call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) - - call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) - !$OMP PARALLEL & - !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& - !$OMP PRIVATE(i,j,k,e,delta) & - !$OMP DEFAULT(NONE) - e = 0d0 - !$OMP DO - do i = 1, nO - do j = 1, nO - do k = 1, nO - delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) - e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta - enddo - enddo + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - energy = energy + e - !$OMP END CRITICAL - !$OMP END PARALLEL enddo enddo - call wall_time(tb) - write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo + !$OMP END DO nowait - energy = energy / 3d0 + !$OMP END PARALLEL - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) - !deallocate(V,W) + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + + !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) + e = 0d0 + !$OMP DO SCHEDULE(dynamic) + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + enddo + + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + + !$OMP END PARALLEL + + energy = energy / 3.d0 + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) end -subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + + double precision :: delta, delta_abc + integer :: i,j,k + + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + +end + +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t2(nO,nO,nV,nV) - double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) - double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) + double precision, intent(out) :: W_cba(nO,nO,nO) + double precision, intent(out) :: W_bca(nO,nO,nO) + double precision, intent(out) :: W_cab(nO,nO,nO) + double precision, intent(out) :: W_bac(nO,nO,nO) + double precision, intent(out) :: W_acb(nO,nO,nO) integer :: l,i,j,k,d + double precision, allocatable, dimension(:,:,:,:) :: W_ikj + double precision, allocatable :: X(:,:,:,:) + allocate(W_ikj(nO,nO,nO,6)) + allocate(X(nV,nO,nO,3)) - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & - !$OMP PRIVATE(i,j,k,d,l) & - !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do i = 1, nO - W_abc(i,j,k) = 0.d0 - - do d = 1, nV - W_abc(i,j,k) = W_abc(i,j,k) & - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) - - enddo - - do l = 1, nO - W_abc(i,j,k) = W_abc(i,j,k) & - - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & - - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj - - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik - - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij - - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj - - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik - enddo - + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k,1) = T_voov(d,k,i,a) + X(d,i,k,2) = T_voov(d,k,i,b) + X(d,i,k,3) = T_voov(d,k,i,c) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + +! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO) + +! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO) +! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO) + +! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO) + + deallocate(X) + + allocate(X(nO,nO,nO,3)) + + do k=1,nO + do j=1,nO + do l=1,nO + X(l,j,k,1) = X_ooov(l,k,j,a) + X(l,j,k,2) = X_ooov(l,k,j,b) + X(l,j,k,3) = X_ooov(l,k,j,c) + enddo + enddo + enddo + + +! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO) + +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO) + +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO) + +! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + +! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) + + do k=1,nO + do j=1,nO + do i=1,nO + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) + W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5) + W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6) + enddo + enddo + enddo + + deallocate(X,W_ikj) end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t1(nO,nV) - double precision, intent(in) :: T_vo(nV,nO) - double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) - double precision, intent(in) :: W(nO,nO,nO) - double precision, intent(out) :: V(nO,nO,nO) + double precision, intent(in) :: T_ov(nO,nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) integer :: i,j,k - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & - !$OMP PRIVATE(i,j,k) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) do k = 1, nO do j = 1, nO do i = 1, nO - !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & - V(i,j,k) = W(i,j,k) & - + X_vvoo(b,c,k,j) * T_vo(a,i) & - + X_vvoo(a,c,k,i) * T_vo(b,j) & - + X_vvoo(a,b,j,i) * T_vo(c,k) + V_abc(i,j,k) = W_abc(i,j,k) & + + X_oovv(j,k,b,c) * T_ov(i,a) & + + X_oovv(i,k,a,c) * T_ov(j,b) & + + X_oovv(i,j,a,b) * T_ov(k,c) + + V_cba(i,j,k) = W_cba(i,j,k) & + + X_oovv(j,k,b,a) * T_ov(i,c) & + + X_oovv(i,k,c,a) * T_ov(j,b) & + + X_oovv(i,j,c,b) * T_ov(k,a) + + V_bca(i,j,k) = W_bca(i,j,k) & + + X_oovv(j,k,c,a) * T_ov(i,b) & + + X_oovv(i,k,b,a) * T_ov(j,c) & + + X_oovv(i,j,b,c) * T_ov(k,a) + + V_cab(i,j,k) = W_cab(i,j,k) & + + X_oovv(j,k,a,b) * T_ov(i,c) & + + X_oovv(i,k,c,b) * T_ov(j,a) & + + X_oovv(i,j,c,a) * T_ov(k,b) + + V_bac(i,j,k) = W_bac(i,j,k) & + + X_oovv(j,k,a,c) * T_ov(i,b) & + + X_oovv(i,k,b,c) * T_ov(j,a) & + + X_oovv(i,j,b,a) * T_ov(k,c) + + V_acb(i,j,k) = W_acb(i,j,k) & + + X_oovv(j,k,c,b) * T_ov(i,a) & + + X_oovv(i,k,a,b) * T_ov(j,c) & + + X_oovv(i,j,a,c) * T_ov(k,b) + enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f new file mode 100644 index 00000000..1f3bebc2 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -0,0 +1,363 @@ +! Main +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(inout) :: energy + + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb,eccsd + + eccsd = energy + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) + + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO + do d = 1, nV + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO + do d = 1, nV + T_voov(d,k,j,c) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & + + !$OMP DO + do c = 1, nV + do k = 1, nO + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO + do c = 1, nV + do b = 1, nV + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP END PARALLEL + + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc +! logical, external :: omp_test_lock + + double precision, allocatable :: memo(:), Pabc(:), waccu(:) + integer*8, allocatable :: sampled(:) +! integer(omp_lock_kind), allocatable :: lock(:) + integer*2 , allocatable :: abc(:,:) + integer*8 :: Nabc, i8 + integer*8, allocatable :: iorder(:) + double precision :: eocc + double precision :: norm + integer :: kiter, isample + + + ! Prepare table of triplets (a,b,c) + + Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) + +! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) + Nabc = 0_8 + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + Nabc = Nabc + 1_8 + Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = c + enddo + + Nabc = Nabc + 1_8 + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = a + Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) + + Nabc = Nabc + 1_8 + abc(1,Nabc) = b + abc(2,Nabc) = a + abc(3,Nabc) = b + Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) + enddo + enddo + + do i8=1,Nabc + iorder(i8) = i8 + enddo + + ! Sort triplets in decreasing Pabc + call dsort_big(Pabc, iorder, Nabc) + + ! Normalize + norm = 0.d0 + do i8=Nabc,1,-1 + norm = norm + Pabc(i8) + enddo + norm = 1.d0/norm + do i8=1,Nabc + Pabc(i8) = Pabc(i8) * norm + enddo + + call i8set_order_big(abc, iorder, Nabc) + + + ! Cumulative distribution for sampling + waccu(Nabc) = 0.d0 + do i8=Nabc-1,1,-1 + waccu(i8) = waccu(i8+1) - Pabc(i8+1) + enddo + waccu(:) = waccu(:) + 1.d0 + + logical :: converged, do_comp + double precision :: eta, variance, error, sample + double precision :: t00, t01 + integer*8 :: ieta, Ncomputed + integer*8, external :: binary_search + + integer :: nbuckets + nbuckets = 100 + + double precision, allocatable :: wsum(:) + allocate(wsum(nbuckets)) + + converged = .False. + Ncomputed = 0_8 + + energy = 0.d0 + variance = 0.d0 + memo(:) = 0.d0 + sampled(:) = -1_8 + + integer*8 :: ileft, iright, imin + ileft = 1_8 + iright = Nabc + integer*8, allocatable :: bounds(:,:) + + allocate (bounds(2,nbuckets)) + do isample=1,nbuckets + eta = 1.d0/dble(nbuckets) * dble(isample) + ieta = binary_search(waccu,eta,Nabc,ileft,iright) + bounds(1,isample) = ileft + bounds(2,isample) = ieta + ileft = ieta+1 + wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) ) + enddo + + Pabc(:) = 1.d0/Pabc(:) + + print '(A)', '' + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' | E(CCSD(T)) | Error | % |' + print '(A)', ' +----------------------+--------------+----------+' + + + call wall_time(t00) + imin = 1_8 + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + !$OMP DEFAULT(SHARED) + + do kiter=1,Nabc + + !$OMP MASTER + do while ((imin <= Nabc).and.(sampled(imin)>-1_8)) + imin = imin+1 + enddo + + ! Deterministic part + if (imin < Nabc) then + ieta=imin + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + + ! Stochastic part + call random_number(eta) + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + cycle + endif + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + + if (sampled(ieta) == -1_8) then + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + sampled(ieta) = sampled(ieta)+1_8 + + enddo + + call wall_time(t01) + if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then + t00 = t01 + + !$OMP TASKWAIT + + double precision :: ET, ET2 + double precision :: energy_stoch, energy_det + double precision :: scale + double precision :: w + double precision :: tmp + energy_stoch = 0.d0 + energy_det = 0.d0 + norm = 0.d0 + scale = 1.d0 + ET = 0.d0 + ET2 = 0.d0 + + + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample))) + scale = scale - wsum(isample) + else + exit + endif + enddo + + do ieta=bounds(1,isample), Nabc + w = dble(max(sampled(ieta),0_8)) + tmp = w * memo(ieta) * Pabc(ieta) + ET = ET + tmp + ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta) + norm = norm + w + enddo + norm = norm/scale + if (norm > 0.d0) then + energy_stoch = ET / norm + variance = ET2 / norm - energy_stoch*energy_stoch + endif + + energy = energy_det + energy_stoch + + print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + endif + !$OMP END MASTER + if (imin >= Nabc) exit + enddo + + !$OMP END PARALLEL + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', '' + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) +end + + + +integer*8 function binary_search(arr, key, size) + implicit none + BEGIN_DOC +! Searches the key in array arr(1:size) between l_in and r_in, and returns its index + END_DOC + integer*8 :: size, i, j, mid, l_in, r_in + double precision, dimension(size) :: arr(1:size) + double precision :: key + + i = 1_8 + j = size + + do while (j >= i) + mid = i + (j - i) / 2 + if (arr(mid) >= key) then + if (mid > 1 .and. arr(mid - 1) < key) then + binary_search = mid + return + end if + j = mid - 1 + else if (arr(mid) < key) then + i = mid + 1 + else + binary_search = mid + 1 + return + end if + end do + binary_search = i +end function binary_search + diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..b5b39b3b 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,11 +6,41 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) enddo !$OMP END PARALLEL DO + print *, '' + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: i,j,k + double precision, allocatable :: buffer(:,:) + + print *, 'AO->MO Transformation of Cholesky vectors .' + !$OMP PARALLEL PRIVATE(i,j,k,buffer) + allocate(buffer(mo_num,mo_num)) + !$OMP DO SCHEDULE(static) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp(k,i,j) = buffer(i,j) + enddo + enddo + enddo + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + print *, '' END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index 4ffb0134..d807f619 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -4,24 +4,68 @@ BEGIN_DOC ! big_array_coulomb_integrals(j,i,k) = = (ik|jj) ! - ! big_array_exchange_integrals(i,j,k) = = (ij|kj) + ! big_array_exchange_integrals(j,i,k) = = (ij|kj) END_DOC - integer :: i,j,k,l + integer :: i,j,k,l,a double precision :: get_two_e_integral double precision :: integral - do k = 1, mo_num - do i = 1, mo_num - do j = 1, mo_num - l = j - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - big_array_coulomb_integrals(j,i,k) = integral - l = j - integral = get_two_e_integral(i,j,l,k,mo_integrals_map) - big_array_exchange_integrals(j,i,k) = integral + if (do_ao_cholesky) then + + double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) + allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + do j=1,mo_num + buffer_jj(:,j) = cholesky_mo_transp(:,j,j) + enddo + + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + buffer_jj, cholesky_ao_num, 0.d0, & + buffer, mo_num*mo_num) + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + big_array_coulomb_integrals(j,i,k) = buffer(i,k,j) + enddo + enddo + enddo + deallocate(buffer_jj) + + allocate(buffer_jj(mo_num,mo_num)) + + do j = 1, mo_num + + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + buffer_jj, mo_num) + + do k=1,mo_num + do i=1,mo_num + big_array_exchange_integrals(j,i,k) = buffer_jj(i,k) + enddo + enddo + enddo + + deallocate(buffer_jj) + + else + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + l = j + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + big_array_coulomb_integrals(j,i,k) = integral + l = j + integral = get_two_e_integral(i,j,l,k,mo_integrals_map) + big_array_exchange_integrals(j,i,k) = integral + enddo + enddo enddo - enddo - enddo + + endif END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b7ef901d..a461504e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1353,15 +1353,30 @@ END_PROVIDER integer :: i,j double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map - mo_two_e_integrals_jj = 0.d0 - mo_two_e_integrals_jj_exchange = 0.d0 + + if (do_ao_cholesky) then + do j=1,mo_num + do i=1,mo_num + !TODO: use dgemm + mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) + mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + enddo + enddo + + else + + do j=1,mo_num + do i=1,mo_num + mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) + mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) + enddo + enddo + + endif do j=1,mo_num do i=1,mo_num - mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) - mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) - mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) + mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) enddo enddo diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 8606e908..8c11478e 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file interface: ezfio, ocaml, provider default: None -[export_rdm] +[export_basis] type: logical -doc: If True, export two-body reduced density matrix +doc: If True, export basis set and AOs interface: ezfio, ocaml, provider -default: False +default: True + +[export_mos] +type: logical +doc: If True, export basis set and AOs +interface: ezfio, ocaml, provider +default: True [export_ao_one_e_ints] type: logical @@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis interface: ezfio, ocaml, provider default: False -[export_mo_one_e_ints] -type: logical -doc: If True, export one-electron integrals in MO basis -interface: ezfio, ocaml, provider -default: False - [export_ao_two_e_ints] type: logical doc: If True, export two-electron integrals in AO basis @@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis interface: ezfio, ocaml, provider default: False +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + [export_mo_two_e_ints] type: logical doc: If True, export two-electron integrals in MO basis @@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis interface: ezfio, ocaml, provider default: False +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f index 3ae0dcb4..f9ecc17f 100644 --- a/src/trexio/export_trexio.irp.f +++ b/src/trexio/export_trexio.irp.f @@ -2,6 +2,6 @@ program export_trexio_prog implicit none read_wf = .True. SOFT_TOUCH read_wf - call export_trexio + call export_trexio(.False.) end diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c55ddc5e..f25ae370 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -1,15 +1,17 @@ -subroutine export_trexio +subroutine export_trexio(update) use trexio implicit none BEGIN_DOC ! Exports the wave function in TREXIO format END_DOC + logical, intent(in) :: update integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc integer :: k double precision, allocatable :: factor(:) character*(256) :: filenames(N_states) + character :: rw filenames(1) = trexio_filename do k=2,N_states @@ -18,15 +20,26 @@ subroutine export_trexio do k=1,N_states print *, 'TREXIO file : ', trim(filenames(k)) - call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + if (update) then + call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + else + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + endif enddo print *, '' + if (update) then + rw = 'u' + else + rw = 'w' + endif + + do k=1,N_states if (backend == 0) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc) else if (backend == 1) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc) endif if (f(k) == 0_8) then print *, 'Unable to open TREXIO file for writing' @@ -171,92 +184,95 @@ subroutine export_trexio endif + if (export_basis) then + ! Basis ! ----- - print *, 'Basis' + print *, 'Basis' + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f(1), prim_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f(1), shell_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) - call trexio_assert(rc, TREXIO_SUCCESS) + allocate(factor(shell_num)) +! if (ao_normalized) then +! factor(1:shell_num) = shell_normalization_factor(1:shell_num) +! else + factor(1:shell_num) = 1.d0 +! endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(shell_num)) - if (ao_normalized) then - factor(1:shell_num) = shell_normalization_factor(1:shell_num) - else - factor(1:shell_num) = 1.d0 - endif - rc = trexio_write_basis_shell_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - deallocate(factor) + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_index(f(1), shell_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f(1), prim_expo) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f(1), prim_coef) - call trexio_assert(rc, TREXIO_SUCCESS) - - allocate(factor(prim_num)) - if (primitives_normalized) then - factor(1:prim_num) = prim_normalization_factor(1:prim_num) - else - factor(1:prim_num) = 1.d0 - endif - rc = trexio_write_basis_prim_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) ! Atomic orbitals ! --------------- - print *, 'AOs' + print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - allocate(factor(ao_num)) - if (ao_normalized) then - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - else - factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) ! One-e AO integrals ! ------------------ @@ -375,28 +391,30 @@ subroutine export_trexio ! Molecular orbitals ! ------------------ - print *, 'MOs' + if (export_mos) then + print *, 'MOs' - rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) - call trexio_assert(rc, TREXIO_SUCCESS) - - do k=1,N_states - rc = trexio_write_mo_num(f(k), mo_num) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - if ( (trim(mo_label) == 'Canonical').and. & - (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) - call trexio_assert(rc, TREXIO_SUCCESS) - ! One-e MO integrals ! ------------------ diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 9f9ad9d6..8c6b79d7 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -3,6 +3,7 @@ program import_integrals_ao implicit none integer(trexio_t) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc + PROVIDE mo_num f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) if (f == 0_8) then @@ -42,10 +43,10 @@ subroutine run(f) if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) - call trexio_assert(rc, TREXIO_SUCCESS) if (rc /= TREXIO_SUCCESS) then print *, irp_here, rc print *, 'Error reading nuclear repulsion' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_nuclei_nuclear_repulsion(s) @@ -63,6 +64,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO overlap' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) @@ -74,6 +76,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO kinetic integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) @@ -85,6 +88,7 @@ subroutine run(f) ! if (rc /= TREXIO_SUCCESS) then ! print *, irp_here ! print *, 'Error reading AO ECP local integrals' +! call trexio_assert(rc, TREXIO_SUCCESS) ! stop -1 ! endif ! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) @@ -96,6 +100,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO potential N-e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) @@ -106,41 +111,112 @@ subroutine run(f) ! AO 2e integrals ! --------------- - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int(f) + PROVIDE ao_num + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map - integer*8 :: offset, icount + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + else + print *, 'AO integrals not found in TREXIO file' + endif + + ! MO integrals + ! ------------ + + allocate(A(mo_num, mo_num)) + if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then + rc = trexio_read_mo_1e_int_core_hamiltonian(f, A) if (rc /= TREXIO_SUCCESS) then - exit + print *, irp_here + print *, 'Error reading MO 1e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) + stop -1 endif - end do - n_integrals = offset + call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A) + call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read') + endif + deallocate(A) - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + ! MO 2e integrals + ! --------------- - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + rc = trexio_has_mo_2e_int(f) + if (rc /= TREXIO_HAS_NOT) then + + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + else + print *, 'MO integrals not found in TREXIO file' + endif end diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index ff17ee4e..b60e3bc1 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -468,6 +468,112 @@ end subroutine +subroutine multiply_poly_0c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:0+nc) + + integer :: ic + + do ic = 0,nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_1c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ic, id + if(nc < 0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + enddo + + do nd = nc+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_2c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ic, id, k + if (nc <0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(2+ic) + c(ic) * b(2) + enddo + + do nd = nc+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_3c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:3), c(0:nc) + double precision, intent(inout) :: d(0:3+nc) + + integer :: ic, id + if (nc <0) return + + do ic = 1,nc + d( ic) = d(1+ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(1+ic) + c(ic) * b(2) + d(3+ic) = d(1+ic) + c(ic) * b(3) + enddo + + do nd = nc+3,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + subroutine multiply_poly(b,nb,c,nc,d,nd) @@ -484,29 +590,16 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ndtmp integer :: ib, ic, id, k - if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0 - continue - else - return - endif - ndtmp = nb+nc + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 - do ic = 0,nc - d(ic) = d(ic) + c(ic) * b(0) - enddo - - do ib=1,nb - d(ib) = d(ib) + c(0) * b(ib) - do ic = 1,nc + do ib=0,nb + do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib) enddo enddo - do nd = ndtmp,0,-1 - if (d(nd) == 0.d0) then - cycle - endif - exit + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit enddo end diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3b43d607..69873bc0 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! -integer :: ndim -integer, intent(inout) :: rank -double precision, dimension(ndim, ndim), intent(inout) :: A -double precision, dimension(ndim, rank), intent(out) :: U -double precision, intent(in) :: tol +integer :: ndim +integer, intent(inout) :: rank +double precision, intent(inout) :: A(ndim, ndim) +double precision, intent(out) :: U(ndim, rank) +double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work character, parameter :: uplo = "U" -integer :: N, LDA +integer :: LDA integer :: info integer :: k, l, rank0 -external :: dpstrf rank0 = rank -N = size(A, dim=1) -LDA = N -allocate(piv(N)) -allocate(work(2*N)) -call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info) +LDA = ndim +allocate(piv(ndim)) +allocate(work(2*ndim)) +call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info) if (rank > rank0) then print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling' stop end if -do k = 1, N - A(k+1:, k) = 0.00D+0 +do k = 1, ndim + A(k+1:ndim, k) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 -do k = 1, N +do k = 1, ndim l = piv(k) - U(l, :) = A(1:rank, k) + U(l, 1:rank) = A(1:rank, k) end do end subroutine pivoted_cholesky diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f index 33e0cbae..fc1451ba 100644 --- a/src/utils_cc/energy.irp.f +++ b/src/utils_cc/energy.irp.f @@ -5,9 +5,8 @@ subroutine det_energy(det,energy) integer(bit_kind), intent(in) :: det double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem - call i_H_j(det,det,N_int,energy) + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion - energy = energy + nuclear_repulsion - end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 9e244d82..485d7002 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) integer :: i1,i2,idx1,idx2 allocate(tmp_F(mo_num,mo_num)) - + call get_fock_matrix_spin(det,1,tmp_F) !$OMP PARALLEL & @@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) !$OMP END PARALLEL deallocate(tmp_F) - + end ! V @@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) double precision, intent(out) :: v(n1,n2,n3,n4) - integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO do i4 = 1, n4 - do i3 = 1, n3 - do i2 = 1, n2 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) do i1 = 1, n1 - idx4 = list4(i4) - idx3 = list3(i3) - idx2 = list2(i2) idx1 = list1(i1) - v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + + end ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] - implicit none - - integer :: i,j,k,l - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map - + integer :: i1,i2,i3,i4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & - !$OMP PRIVATE(i,j,k,l) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do l = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do i = 1, mo_num - cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + END_PROVIDER ! oooo @@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) - + do q = 1, cc_n_mo do p = 1, cc_n_mo cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) @@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] enddo FREE cc_space_v_vvvv - + END_PROVIDER ! iaia @@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) - + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) !$OMP PARALLEL & @@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) - + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) !$OMP PARALLEL & @@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift) else shift = n_S(1) endif - + end ! F @@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) ! Compute the Fock matrix corresponding to two lists of spin orbitals. ! Ex: occ/occ, occ/vir,... END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) integer, intent(in) :: list1(n1,2), list2(n2,2) integer, intent(in) :: dim1, dim2 - + double precision, intent(out) :: f(dim1, dim2) double precision, allocatable :: tmp_F(:,:) integer :: i,j, idx_i,idx_j,i_shift,j_shift integer :: tmp_i,tmp_j integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals allocate(tmp_F(mo_num,mo_num)) - + do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 @@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) enddo enddo - + deallocate(tmp_F) - + end ! Get F @@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f) BEGIN_DOC ! Fock matrix alpha or beta of an arbitrary det END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: s - + double precision, intent(out) :: f(mo_num,mo_num) - + integer :: p,q,i,s1,s2 integer(bit_kind) :: res(N_int,2) logical :: ok @@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f) s1 = 2 s2 = 1 endif - + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + !$OMP PARALLEL & - !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & !$OMP PRIVATE(p,q,ok,i,res)& !$OMP DEFAULT(NONE) !$OMP DO collapse(1) @@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f) do i = 1, mo_num call apply_hole(det, s1, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) endif enddo do i = 1, mo_num call apply_hole(det, s2, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) endif enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + end ! V @@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer :: si,sj,sk,sl,s PROVIDE cc_space_v - + !$OMP PARALLEL & !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 @@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then @@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo !$OMP END DO endif - + enddo enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx @@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, call shift_idx_spin(sl,n4_S,l_shift) tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - + !$OMP PARALLEL & !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & !$OMP i,j,k,idx_i,idx_j,idx_k,& !$OMP tmp_i,tmp_j,tmp_k)& !$OMP DEFAULT(NONE) - + do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_ij_l @@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sk,n3_S,k_shift) tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - + !$OMP PARALLEL & !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & !$OMP i,j,l,idx_i,idx_j,idx_l,& !$OMP tmp_i,tmp_j,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_i_kl @@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sj,n2_S,j_shift) tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - + !$OMP PARALLEL & !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & !$OMP i,k,l,idx_i,idx_k,idx_l,& !$OMP tmp_i,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f index dbd4f4bd..0cf8626c 100644 --- a/src/utils_cc/update_t.irp.f +++ b/src/utils_cc/update_t.irp.f @@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1) !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,a) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(1) + !$OMP DO do a = 1, nV do i = 1, nO t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) @@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2) !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO