From 2d06e8fdaefff2905933a0b5ff52e93f8193f0a7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 May 2023 10:52:36 +0200 Subject: [PATCH 01/79] Cholesky false by default --- external/qp2-dependencies | 2 +- src/ao_two_e_ints/EZFIO.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index e0d0e02e..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index caed4698..4ab080ec 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -22,4 +22,4 @@ ezfio_name: direct type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: True +default: False From b8804f058a2872976af4712248609fab5bf6edaf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 21:38:01 +0200 Subject: [PATCH 02/79] Moved qp_import_trexio.py --- {src/trexio => scripts}/qp_import_trexio.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) rename {src/trexio => scripts}/qp_import_trexio.py (98%) diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py similarity index 98% rename from src/trexio/qp_import_trexio.py rename to scripts/qp_import_trexio.py index de8d1269..d8a19160 100755 --- a/src/trexio/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -13,12 +13,17 @@ Options: import sys import os -import trexio import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +try: + import trexio +except ImportError: + print("Error: trexio python module is not found. Try python3 -m pip install trexio") + sys.exit(1) + try: QP_ROOT = os.environ["QP_ROOT"] From 6289508c1e4e1ae7abce6388cf42fa12b5d28752 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 13:32:52 +0200 Subject: [PATCH 03/79] Swapped indices in CCSD(T) --- scripts/qp_import_trexio.py | 23 +++--- src/ccsd/ccsd_space_orb_sub.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 108 +++++++++++++--------------- src/utils/linear_algebra.irp.f | 30 ++++---- 4 files changed, 79 insertions(+), 84 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index d8a19160..eb19e16b 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -95,14 +95,15 @@ def write_ezfio(trexio_filename, filename): p = re.compile(r'(\d*)$') label = [p.sub("", x).capitalize() for x in label] ezfio.set_nuclei_nucl_label(label) + print("OK") else: ezfio.set_nuclei_nucl_num(1) ezfio.set_nuclei_nucl_charge([0.]) ezfio.set_nuclei_nucl_coord([0.,0.,0.]) ezfio.set_nuclei_nucl_label(["X"]) + print("None") - print("OK") print("Electrons\t...\t", end=' ') @@ -110,12 +111,12 @@ def write_ezfio(trexio_filename, filename): try: num_beta = trexio.read_electron_dn_num(trexio_file) except: - num_beta = sum(charge)//2 + num_beta = int(sum(charge))//2 try: num_alpha = trexio.read_electron_up_num(trexio_file) except: - num_alpha = sum(charge) - num_beta + num_alpha = int(sum(charge)) - num_beta if num_alpha == 0: print("\n\nError: There are zero electrons in the TREXIO file.\n\n") @@ -123,7 +124,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_electrons_elec_alpha_num(num_alpha) ezfio.set_electrons_elec_beta_num(num_beta) - print("OK") + print(f"{num_alpha} {num_beta}") print("Basis\t\t...\t", end=' ') @@ -263,7 +264,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_ao_basis_ao_expo(expo) ezfio.set_ao_basis_ao_basis("Read from TREXIO") - print("OK") + print("OK") + + else: + print("None") # _ @@ -308,10 +312,10 @@ def write_ezfio(trexio_filename, filename): for i in range(num_beta): mo_occ[i] += 1. ezfio.set_mo_basis_mo_occ(mo_occ) + print("OK") except: - pass + print("None") - print("OK") print("Pseudos\t\t...\t", end=' ') @@ -391,9 +395,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + print("OK") - - print("OK") + else: + print("None") diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b63375cf..acd14034 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -169,7 +169,7 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) call wall_time(tb) print*,'Time: ',tb-ta, ' s' diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 3b762a06..acc2aaa9 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -15,8 +15,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) - double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) - double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_abc @@ -24,25 +24,25 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !allocate(V(nV,nV,nV,nO,nO,nO)) allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) - allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) - allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) ! Temporary arrays !$OMP PARALLEL & - !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) !$OMP DO collapse(3) do i = 1, nO do a = 1, nV do b = 1, nV do d = 1, nV - X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo enddo enddo @@ -54,7 +54,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do k = 1, nO do c = 1, nV do d = 1, nV - T_vvoo(d,c,k,j) = t2(k,j,c,d) + T_voov(d,k,j,c) = t2(k,j,c,d) enddo enddo enddo @@ -62,14 +62,14 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END DO nowait !v_vooo(c,j,k,l) * t2(i,l,a,b) & - !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do c = 1, nV + do c = 1, nV + do k = 1, nO + do j = 1, nO do l = 1, nO - X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) enddo enddo enddo @@ -81,35 +81,27 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do b = 1, nV do a = 1, nV do l = 1, nO - T_ovvo(l,a,b,i) = t2(i,l,a,b) + T_oovv(l,i,a,b) = t2(i,l,a,b) enddo enddo enddo enddo !$OMP END DO nowait - !v_vvoo(b,c,j,k) * t1(i,a) & - !X_vvoo(b,c,k,j) * T1_vo(a,i) & + !X_oovv(j,k,b,c) * T1_vo(a,i) & !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV - do b = 1, nV - X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + do c = 1, nV + do b = 1, nV + do j = 1, nO + do k = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(1) - do i = 1, nO - do a = 1, nV - T_vo(a,i) = t1(i,a) - enddo - enddo - !$OMP END DO !$OMP END PARALLEL call wall_time(ta) @@ -118,13 +110,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do b = 1, nV do a = 1, nV delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) - call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) - call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) - call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) + call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) + call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) + call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,t1,X_oovv,W_cba,V_cba) !$OMP PARALLEL & !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& !$OMP PRIVATE(i,j,k,e,delta) & @@ -154,26 +146,26 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = energy / 3d0 - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vovv,X_ooov,T_voov,T_oovv) !deallocate(V,W) end -subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) implicit none integer, intent(in) :: nO,nV,a,b,c !double precision, intent(in) :: t2(nO,nO,nV,nV) - double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) - double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & !$OMP PRIVATE(i,j,k,d,l) & !$OMP DEFAULT(NONE) @@ -185,23 +177,23 @@ subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) do d = 1, nV W_abc(i,j,k) = W_abc(i,j,k) & - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + X_vovv(d,i,b,a) * T_voov(d,k,j,c) & + + X_vovv(d,i,c,a) * T_voov(d,j,k,b) & + + X_vovv(d,k,a,c) * T_voov(d,j,i,b) & + + X_vovv(d,k,b,c) * T_voov(d,i,j,a) & + + X_vovv(d,j,c,b) * T_voov(d,i,k,a) & + + X_vovv(d,j,a,b) * T_voov(d,k,i,c) enddo do l = 1, nO W_abc(i,j,k) = W_abc(i,j,k) & - - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & - - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj - - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik - - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij - - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj - - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) & + - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) & ! bc kj + - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) & ! prev ac ik + - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) & ! prev ab ij + - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) & ! prev bc kj + - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) ! prev ac ik enddo enddo @@ -216,21 +208,21 @@ end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W,V) implicit none integer, intent(in) :: nO,nV,a,b,c !double precision, intent(in) :: t1(nO,nV) - double precision, intent(in) :: T_vo(nV,nO) - double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: T_ov(nO,nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) double precision, intent(in) :: W(nO,nO,nO) double precision, intent(out) :: V(nO,nO,nO) integer :: i,j,k !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP SHARED(nO,nV,a,b,c,T_ov,X_oovv,W,V) & !$OMP PRIVATE(i,j,k) & !$OMP DEFAULT(NONE) !$OMP DO collapse(2) @@ -239,9 +231,9 @@ implicit none do i = 1, nO !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & V(i,j,k) = W(i,j,k) & - + X_vvoo(b,c,k,j) * T_vo(a,i) & - + X_vvoo(a,c,k,i) * T_vo(b,j) & - + X_vvoo(a,b,j,i) * T_vo(c,k) + + X_oovv(j,k,b,c) * T_ov(i,a) & + + X_oovv(i,k,a,c) * T_ov(j,b) & + + X_oovv(i,j,a,b) * T_ov(k,c) enddo enddo enddo diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3b43d607..69873bc0 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! -integer :: ndim -integer, intent(inout) :: rank -double precision, dimension(ndim, ndim), intent(inout) :: A -double precision, dimension(ndim, rank), intent(out) :: U -double precision, intent(in) :: tol +integer :: ndim +integer, intent(inout) :: rank +double precision, intent(inout) :: A(ndim, ndim) +double precision, intent(out) :: U(ndim, rank) +double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work character, parameter :: uplo = "U" -integer :: N, LDA +integer :: LDA integer :: info integer :: k, l, rank0 -external :: dpstrf rank0 = rank -N = size(A, dim=1) -LDA = N -allocate(piv(N)) -allocate(work(2*N)) -call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info) +LDA = ndim +allocate(piv(ndim)) +allocate(work(2*ndim)) +call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info) if (rank > rank0) then print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling' stop end if -do k = 1, N - A(k+1:, k) = 0.00D+0 +do k = 1, ndim + A(k+1:ndim, k) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 -do k = 1, N +do k = 1, ndim l = piv(k) - U(l, :) = A(1:rank, k) + U(l, 1:rank) = A(1:rank, k) end do end subroutine pivoted_cholesky From ca5857ac3630a452199bb25b29eed04e8674e6b3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 15:34:16 +0200 Subject: [PATCH 04/79] Added dgemm in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 92 +++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 13 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index acc2aaa9..e960d47d 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -162,7 +162,86 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d + double precision, allocatable, dimension(:,:,:) :: W_ikj, X + allocate(W_ikj(nO,nO,nO)) + allocate(X(nV,nO,nO)) + + W_abc = 0.d0 + W_ikj = 0.d0 + +! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + +! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k) = T_voov(d,k,i,c) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) + +! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do j=1,nO + do i=1,nO + do d=1,nV + X(d,i,j) = T_voov(d,j,i,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + +! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do k=1,nO + do j=1,nO + do d=1,nV + X(d,j,k) = T_voov(d,k,j,c) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,b,a), nV, X(1,1,1), nV, 1.d0, W_abc, nO) + + + +! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj +! - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) : k ij +! - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) : k ji +! - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) : j ki +! - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) : j ik + + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j,k) + do k=1,nO + do j=1,nO + do i=1,nO + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO !$OMP PARALLEL & !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & @@ -173,18 +252,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) do k = 1, nO do j = 1, nO do i = 1, nO - W_abc(i,j,k) = 0.d0 - - do d = 1, nV - W_abc(i,j,k) = W_abc(i,j,k) & - + X_vovv(d,i,b,a) * T_voov(d,k,j,c) & - + X_vovv(d,i,c,a) * T_voov(d,j,k,b) & - + X_vovv(d,k,a,c) * T_voov(d,j,i,b) & - + X_vovv(d,k,b,c) * T_voov(d,i,j,a) & - + X_vovv(d,j,c,b) * T_voov(d,i,k,a) & - + X_vovv(d,j,a,b) * T_voov(d,k,i,c) - - enddo do l = 1, nO W_abc(i,j,k) = W_abc(i,j,k) & @@ -202,7 +269,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) !$OMP END DO !$OMP END PARALLEL - end From 1c0141d9a2be1b8025c76a178c81559b63432121 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 21:25:49 +0200 Subject: [PATCH 05/79] Full dgemm in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 140 ++++++++++++---------------- 1 file changed, 62 insertions(+), 78 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index e960d47d..c5c15fb3 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -162,78 +162,97 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d - double precision, allocatable, dimension(:,:,:) :: W_ikj, X + double precision, allocatable, dimension(:,:,:) :: W_ikj + double precision, allocatable :: X(:,:,:,:) allocate(W_ikj(nO,nO,nO)) - allocate(X(nV,nO,nO)) + allocate(X(nV,nO,nO,2)) - W_abc = 0.d0 - W_ikj = 0.d0 + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k,1) = T_voov(d,k,i,c) +! X(d,i,j,2) = T_voov(d,j,i,b) + X(d,i,k,2) = T_voov(d,k,i,b) +! X(d,j,k,1) = T_voov(d,k,j,c) + enddo + enddo + enddo ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) -! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) - do k=1,nO - do i=1,nO - do d=1,nV - X(d,i,k) = T_voov(d,k,i,c) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) - ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) - do j=1,nO - do i=1,nO - do d=1,nV - X(d,i,j) = T_voov(d,j,i,b) - enddo - enddo - enddo - !$OMP END PARALLEL DO call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j call dgemm('T','N', nO*nO, nO, nV, 1.d0, & T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) -! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + deallocate(X) + + allocate(X(nO,nO,nO,2)) + do k=1,nO do j=1,nO - do d=1,nV - X(d,j,k) = T_voov(d,k,j,c) + do l=1,nO + X(l,j,k,1) = X_ooov(l,k,j,b) +! X(l,i,j,2) = X_ooov(l,j,i,a) + X(l,j,k,2) = X_ooov(l,k,j,a) +! X(l,i,k,2) = X_ooov(l,k,i,a) enddo enddo enddo - !$OMP END PARALLEL DO - - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,b,a), nV, X(1,1,1), nV, 1.d0, W_abc, nO) - ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk -! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj -! - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) : k ij -! - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) : k ji -! - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) : j ki -! - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) : j ik + call dgemm('T','N', nO, nO*nO, nO, -1.d0, & + T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj + + call dgemm('T','N', nO, nO*nO, nO, -1.d0, & + T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + +! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + +! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj, nO*nO) + +! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj, nO*nO) - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j,k) do k=1,nO do j=1,nO do i=1,nO @@ -241,33 +260,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) enddo enddo enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & - !$OMP PRIVATE(i,j,k,d,l) & - !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do i = 1, nO - - do l = 1, nO - W_abc(i,j,k) = W_abc(i,j,k) & - - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) & - - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) & ! bc kj - - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) & ! prev ac ik - - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) & ! prev ab ij - - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) & ! prev bc kj - - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) ! prev ac ik - enddo - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL end @@ -287,15 +279,9 @@ implicit none integer :: i,j,k - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_ov,X_oovv,W,V) & - !$OMP PRIVATE(i,j,k) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) do k = 1, nO do j = 1, nO do i = 1, nO - !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & V(i,j,k) = W(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & @@ -303,8 +289,6 @@ implicit none enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL end From 2ff4e61c9e283890d5c1819c034b788487f08405 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 21:48:04 +0200 Subject: [PATCH 06/79] Better parallelism in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 61 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index c5c15fb3..8b6db915 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -14,19 +14,17 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: V(:,:,:,:,:,:) double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_abc - !allocate(W(nV,nV,nV,nO,nO,nO)) - !allocate(V(nV,nV,nV,nO,nO,nO)) - allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) - allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + call set_multiple_levels_omp(.False.) + ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & @@ -104,50 +102,48 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - call wall_time(ta) energy = 0d0 + !$OMP PARALLEL & + !$OMP PRIVATE(a,b,c,W_abc,W_cab,W_bca,W_cba,V_abc) & + !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & + !$OMP DEFAULT(SHARED) + allocate(W_abc(nO,nO,nO), W_cab(nO,nO,nO), V_abc(nO,nO,nO), & + W_bca(nO,nO,nO), W_cba(nO,nO,nO) ) + !$OMP DO do c = 1, nV do b = 1, nV do a = 1, nV + e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) + call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) - call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,t1,X_oovv,W_cba,V_cba) - !$OMP PARALLEL & - !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& - !$OMP PRIVATE(i,j,k,e,delta) & - !$OMP DEFAULT(NONE) - e = 0d0 - !$OMP DO + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) do i = 1, nO do j = 1, nO do k = 1, nO delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + * V_abc(i,j,k) * delta enddo enddo enddo - !$OMP END DO NOWAIT !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL - !$OMP END PARALLEL enddo enddo - call wall_time(tb) - write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo + !$OMP END DO - energy = energy / 3d0 + deallocate(W_abc,V_abc,W_cab,W_bca,W_cba) + !$OMP END PARALLEL - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vovv,X_ooov,T_voov,T_oovv) - !deallocate(V,W) + energy = energy / 3.d0 + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) end @@ -233,7 +229,7 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) call dgemm('T','N', nO, nO*nO, nO, -1.d0, & T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) -! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k call dgemm('T','N', nO*nO, nO, nO, -1.d0, & X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) @@ -261,31 +257,34 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) enddo enddo + deallocate(X,W_ikj) end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t1(nO,nV) double precision, intent(in) :: T_ov(nO,nV) double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: W(nO,nO,nO) - double precision, intent(out) :: V(nO,nO,nO) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cba(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO) integer :: i,j,k do k = 1, nO do j = 1, nO do i = 1, nO - V(i,j,k) = W(i,j,k) & + V_abc(i,j,k) = W_abc(i,j,k) - W_cba(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & - + X_oovv(i,j,a,b) * T_ov(k,c) + + X_oovv(i,j,a,b) * T_ov(k,c) & + - X_oovv(j,k,b,a) * T_ov(i,c) & + - X_oovv(i,k,c,a) * T_ov(j,b) & + - X_oovv(i,j,c,b) * T_ov(k,a) enddo enddo enddo From c18bea7e817af0142e2fd76577c9f7d90a39e533 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 22:23:08 +0200 Subject: [PATCH 07/79] Merged 4 calls --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 109 ++++++++++++++++------------ 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 8b6db915..7f334a37 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -115,11 +115,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) - call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) - call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) - + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) do i = 1, nO do j = 1, nO @@ -147,112 +143,135 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end -subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) + double precision, intent(out) :: W_cba(nO,nO,nO) + double precision, intent(out) :: W_bca(nO,nO,nO) + double precision, intent(out) :: W_cab(nO,nO,nO) integer :: l,i,j,k,d - double precision, allocatable, dimension(:,:,:) :: W_ikj + double precision, allocatable, dimension(:,:,:,:) :: W_ikj double precision, allocatable :: X(:,:,:,:) - allocate(W_ikj(nO,nO,nO)) - allocate(X(nV,nO,nO,2)) + allocate(W_ikj(nO,nO,nO,4)) + allocate(X(nV,nO,nO,3)) do k=1,nO do i=1,nO do d=1,nV X(d,i,k,1) = T_voov(d,k,i,c) -! X(d,i,j,2) = T_voov(d,j,i,b) X(d,i,k,2) = T_voov(d,k,i,b) -! X(d,j,k,1) = T_voov(d,k,j,c) + X(d,i,k,3) = T_voov(d,k,i,a) enddo enddo enddo ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) ! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,3), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,3), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) ! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) deallocate(X) - allocate(X(nO,nO,nO,2)) + allocate(X(nO,nO,nO,3)) do k=1,nO do j=1,nO do l=1,nO X(l,j,k,1) = X_ooov(l,k,j,b) -! X(l,i,j,2) = X_ooov(l,j,i,a) X(l,j,k,2) = X_ooov(l,k,j,a) -! X(l,i,k,2) = X_ooov(l,k,i,a) + X(l,j,k,3) = X_ooov(l,k,j,c) enddo enddo enddo ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk - call dgemm('T','N', nO, nO*nO, nO, -1.d0, & - T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) ! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj - - call dgemm('T','N', nO, nO*nO, nO, -1.d0, & - T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,1), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,2), nO, 1.d0, W_cab, nO) ! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) ! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) ! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) ! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) do k=1,nO do j=1,nO do i=1,nO - W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j) + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,2) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,3) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,4) enddo enddo enddo From cad1da1768b7ab3d9a93b6d6439a0bb414fb8ab7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 23:29:58 +0200 Subject: [PATCH 08/79] All permutations in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 168 +++++++++++++++++++--------- 1 file changed, 114 insertions(+), 54 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 7f334a37..65a04549 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -12,9 +12,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) - double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:) + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -103,26 +104,30 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL energy = 0d0 - !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,W_abc,W_cab,W_bca,W_cba,V_abc) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & - !$OMP DEFAULT(SHARED) - allocate(W_abc(nO,nO,nO), W_cab(nO,nO,nO), V_abc(nO,nO,nO), & - W_bca(nO,nO,nO), W_cba(nO,nO,nO) ) + !$OMP PARALLEL & + !$OMP PRIVATE(a,b,c) & + !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & + !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & + !$OMP DEFAULT(SHARED) + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) !$OMP DO do c = 1, nV do b = 1, nV do a = 1, nV e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) do i = 1, nO do j = 1, nO do k = 1, nO delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * V_abc(i,j,k) * delta + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta enddo enddo enddo @@ -134,7 +139,9 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO - deallocate(W_abc,V_abc,W_cab,W_bca,W_cba) + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + !$OMP END PARALLEL energy = energy / 3.d0 @@ -143,7 +150,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end -subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none @@ -154,20 +161,22 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, double precision, intent(out) :: W_cba(nO,nO,nO) double precision, intent(out) :: W_bca(nO,nO,nO) double precision, intent(out) :: W_cab(nO,nO,nO) + double precision, intent(out) :: W_bac(nO,nO,nO) + double precision, intent(out) :: W_acb(nO,nO,nO) integer :: l,i,j,k,d double precision, allocatable, dimension(:,:,:,:) :: W_ikj double precision, allocatable :: X(:,:,:,:) - allocate(W_ikj(nO,nO,nO,4)) + allocate(W_ikj(nO,nO,nO,6)) allocate(X(nV,nO,nO,3)) do k=1,nO do i=1,nO do d=1,nV - X(d,i,k,1) = T_voov(d,k,i,c) + X(d,i,k,1) = T_voov(d,k,i,a) X(d,i,k,2) = T_voov(d,k,i,b) - X(d,i,k,3) = T_voov(d,k,i,a) + X(d,i,k,3) = T_voov(d,k,i,c) enddo enddo enddo @@ -175,44 +184,56 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO) ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO) ! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,3), nV, 1.d0, W_cba, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,3), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO) ! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO) deallocate(X) @@ -221,8 +242,8 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, do k=1,nO do j=1,nO do l=1,nO - X(l,j,k,1) = X_ooov(l,k,j,b) - X(l,j,k,2) = X_ooov(l,k,j,a) + X(l,j,k,1) = X_ooov(l,k,j,a) + X(l,j,k,2) = X_ooov(l,k,j,b) X(l,j,k,3) = X_ooov(l,k,j,c) enddo enddo @@ -231,47 +252,61 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO) ! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,1), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,2), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO) ! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO) ! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO) ! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) ! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) do k=1,nO do j=1,nO do i=1,nO W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) - W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,2) - W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,3) - W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,4) + W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5) + W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6) enddo enddo enddo @@ -282,28 +317,53 @@ end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) implicit none integer, intent(in) :: nO,nV,a,b,c double precision, intent(in) :: T_ov(nO,nV) double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cba(nO,nO,nO) - double precision, intent(out) :: V_abc(nO,nO,nO) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) integer :: i,j,k do k = 1, nO do j = 1, nO do i = 1, nO - V_abc(i,j,k) = W_abc(i,j,k) - W_cba(i,j,k) & + V_abc(i,j,k) = W_abc(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & - + X_oovv(i,j,a,b) * T_ov(k,c) & - - X_oovv(j,k,b,a) * T_ov(i,c) & - - X_oovv(i,k,c,a) * T_ov(j,b) & - - X_oovv(i,j,c,b) * T_ov(k,a) + + X_oovv(i,j,a,b) * T_ov(k,c) + + V_cba(i,j,k) = W_cba(i,j,k) & + + X_oovv(j,k,b,a) * T_ov(i,c) & + + X_oovv(i,k,c,a) * T_ov(j,b) & + + X_oovv(i,j,c,b) * T_ov(k,a) + + V_bca(i,j,k) = W_bca(i,j,k) & + + X_oovv(j,k,c,a) * T_ov(i,b) & + + X_oovv(i,k,b,a) * T_ov(j,c) & + + X_oovv(i,j,b,c) * T_ov(k,a) + + V_cab(i,j,k) = W_cab(i,j,k) & + + X_oovv(j,k,a,b) * T_ov(i,c) & + + X_oovv(i,k,c,b) * T_ov(j,a) & + + X_oovv(i,j,c,a) * T_ov(k,b) + + V_bac(i,j,k) = W_bac(i,j,k) & + + X_oovv(j,k,a,c) * T_ov(i,b) & + + X_oovv(i,k,b,c) * T_ov(j,a) & + + X_oovv(i,j,b,a) * T_ov(k,c) + + V_acb(i,j,k) = W_acb(i,j,k) & + + X_oovv(j,k,c,b) * T_ov(i,a) & + + X_oovv(i,k,a,b) * T_ov(j,c) & + + X_oovv(i,j,a,c) * T_ov(k,b) + enddo enddo enddo From d4ba229e6fdb6d567dd0c0258cb14aa14fa6524d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 02:13:55 +0200 Subject: [PATCH 09/79] Symmetries in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 51 +++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 65a04549..a2e4ec7b 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,7 +19,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc + double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) @@ -105,7 +105,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = 0d0 !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c) & + !$OMP PRIVATE(a,b,c,x1) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & @@ -114,30 +114,55 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + e = 0d0 !$OMP DO - do c = 1, nV - do b = 1, nV - do a = 1, nV - e = 0d0 + do a = 1, nV + do b = 1, a-1 + do c = 1, b-1 delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) do i = 1, nO do j = 1, nO do k = 1, nO - delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & + (4d0 * W_bca(i,j,k) + W_cab(i,j,k) + W_abc(i,j,k)) * (V_bca(i,j,k) - V_acb(i,j,k)) + & + (4d0 * W_cba(i,j,k) + W_bac(i,j,k) + W_acb(i,j,k)) * (V_cba(i,j,k) - V_abc(i,j,k)) + & + (4d0 * W_cab(i,j,k) + W_abc(i,j,k) + W_bca(i,j,k)) * (V_cab(i,j,k) - V_bac(i,j,k)) + & + 0.d0) enddo enddo enddo - !$OMP CRITICAL - energy = energy + e - !$OMP END CRITICAL + enddo + enddo + + c = a + do b = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & + 0.d0) + enddo + enddo enddo enddo enddo !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) @@ -184,7 +209,7 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) From 2e54537f1547861586c3c078e8ce5b3e1a9df652 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 02:41:34 +0200 Subject: [PATCH 10/79] v3 of (T) is fast! --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index a2e4ec7b..462d4adf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -115,7 +115,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) e = 0d0 - !$OMP DO + !$OMP DO SCHEDULE(dynamic) do a = 1, nV do b = 1, a-1 do c = 1, b-1 @@ -142,6 +142,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) c = a do b = 1, nV + if (b == c) cycle delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) @@ -159,7 +160,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT + !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL From df07c65980affa277b304a17d35f1636f598171a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 10:07:50 +0200 Subject: [PATCH 11/79] Fixed trexio installation --- configure | 2 -- 1 file changed, 2 deletions(-) diff --git a/configure b/configure index 66bc9419..48e6fd12 100755 --- a/configure +++ b/configure @@ -215,7 +215,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ EOF @@ -229,7 +228,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files EOF From 873d978348018e6e9774444c3532ffb45d323fb2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 13:06:06 +0200 Subject: [PATCH 12/79] Less multiplications in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 55 +++++++++++++++-------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 462d4adf..7c0ed929 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -36,10 +36,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo @@ -48,10 +48,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV T_voov(d,k,j,c) = t2(k,j,c,d) enddo @@ -63,7 +63,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -75,10 +75,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do i = 1, nO + !$OMP DO + do a = 1, nV do b = 1, nV - do a = 1, nV + do i = 1, nO do l = 1, nO T_oovv(l,i,a,b) = t2(i,l,a,b) enddo @@ -89,7 +89,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !X_oovv(j,k,b,c) * T1_vo(a,i) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do b = 1, nV do j = 1, nO @@ -122,18 +122,20 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do i = 1, nO + do k = 1, nO do j = 1, nO - do k = 1, nO + do i = 1, nO delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & - (4d0 * W_bca(i,j,k) + W_cab(i,j,k) + W_abc(i,j,k)) * (V_bca(i,j,k) - V_acb(i,j,k)) + & - (4d0 * W_cba(i,j,k) + W_bac(i,j,k) + W_acb(i,j,k)) * (V_cba(i,j,k) - V_abc(i,j,k)) + & - (4d0 * W_cab(i,j,k) + W_abc(i,j,k) + W_bca(i,j,k)) * (V_cab(i,j,k) - V_bac(i,j,k)) + & - 0.d0) + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) enddo enddo enddo @@ -146,15 +148,14 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do i = 1, nO + do k = 1, nO do j = 1, nO - do k = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + do i = 1, nO + delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + delta * ( & (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & - 0.d0) + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) enddo enddo enddo From 738140547974f4e1ec9cac4cb25fa24edc963cc1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 19:37:34 +0200 Subject: [PATCH 13/79] Removed collapse in ccsd --- src/ccsd/ccsd_space_orb_sub.irp.f | 150 ++++++++++++++-------------- src/ccsd/ccsd_t_space_orb_abc.irp.f | 36 +++---- src/utils_cc/update_t.irp.f | 4 +- 3 files changed, 93 insertions(+), 97 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index acd14034..75752f5c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -109,7 +109,7 @@ subroutine run_ccsd_space_orb call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) else - print*,'Unkonw cc_method_method: '//cc_update_method + print*,'Unkown cc_method_method: '//cc_update_method endif call update_tau_space(nO,nV,t1,t2,tau) @@ -211,8 +211,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) !$omp default(none) e = 0d0 !$omp do - do i = 1, nO - do a = 1, nV + do a = 1, nV + do i = 1, nO e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) enddo enddo @@ -255,7 +255,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -373,7 +373,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -412,7 +412,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -452,7 +452,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -464,11 +464,11 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do nowait - !$omp do collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do u = 1, nO + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo @@ -504,8 +504,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) - !$omp do collapse(3) do u = 1, nO + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -513,8 +513,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('T','N', nO, nV, nO*nO*nV, & @@ -527,9 +527,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) max_r1 = 0d0 do a = 1, nV do i = 1, nO - if (dabs(r1(i,a)) > max_r1) then - max_r1 = dabs(r1(i,a)) - endif + max_r1 = max(dabs(r1(i,a)), max_r1) enddo enddo @@ -657,7 +655,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - !$omp do collapse(3) + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO @@ -727,7 +725,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -787,7 +785,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -863,7 +861,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -885,7 +883,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +919,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +955,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +977,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1014,8 +1012,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1023,8 +1021,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nV*nO*nV,nV,nO, & @@ -1041,7 +1039,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1079,7 +1077,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1116,8 +1114,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1125,8 +1123,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO,nO*nV*nO,nV, & @@ -1143,7 +1141,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1182,19 +1180,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO - X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1216,7 +1214,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1252,7 +1250,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1262,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1286,7 +1284,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1319,7 +1317,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do i = 1, nO do gam = 1, nV @@ -1331,7 +1329,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do beta = 1, nV do v = 1, nO do a = 1, nV @@ -1353,7 +1351,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1373,7 +1371,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2) & !$omp private(i,j,a,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do a = 1, nV do j = 1, nO @@ -1391,9 +1389,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do a = 1, nV do j = 1, nO do i = 1, nO - if (dabs(r2(i,j,a,b)) > max_r2) then - max_r2 = dabs(r2(i,j,a,b)) - endif + max_r2 = max(r2(i,j,a,b), max_r2) enddo enddo enddo @@ -1448,7 +1444,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1462,7 +1458,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do u = 1, nO @@ -1484,7 +1480,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1553,7 +1549,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1564,8 +1560,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1573,8 +1569,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & @@ -1594,7 +1590,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1658,7 +1654,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do - !$omp do collapse(1) + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1720,7 +1716,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do collapse(1) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1788,8 +1784,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1797,10 +1793,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do a = 1, nV @@ -1822,8 +1818,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Y_ovov) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1831,8 +1827,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovoo) @@ -1849,7 +1845,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1861,7 +1857,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1886,8 +1882,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,12 +1891,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,11 +1904,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1920,8 +1916,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','T',nO*nV,nV*nO,nV*nO, & @@ -1933,8 +1929,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1942,8 +1938,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovvo,Z_ovvo,Y_ovov) @@ -2003,7 +1999,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2015,8 +2011,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do j = 1, nO do b = 1, nV @@ -2024,11 +2020,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -2036,8 +2032,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO*nV*nO,nV,nO, & @@ -2060,7 +2056,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,Z) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 462d4adf..5cf27568 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -36,10 +36,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo @@ -48,10 +48,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV T_voov(d,k,j,c) = t2(k,j,c,d) enddo @@ -63,7 +63,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -75,10 +75,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO do l = 1, nO T_oovv(l,i,a,b) = t2(i,l,a,b) enddo @@ -89,11 +89,11 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !X_oovv(j,k,b,c) * T1_vo(a,i) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do b = 1, nV - do j = 1, nO - do k = 1, nO + do k = 1, nO + do j = 1, nO X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo enddo @@ -117,8 +117,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) e = 0d0 !$OMP DO SCHEDULE(dynamic) do a = 1, nV - do b = 1, a-1 - do c = 1, b-1 + do b = a+1, nV + do c = b+1, nV delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f index dbd4f4bd..0cf8626c 100644 --- a/src/utils_cc/update_t.irp.f +++ b/src/utils_cc/update_t.irp.f @@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1) !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,a) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(1) + !$OMP DO do a = 1, nV do i = 1, nO t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) @@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2) !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO From 5b427641a66047513227fc1ed9912f8784a17630 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 19:46:06 +0200 Subject: [PATCH 14/79] Inlined multiply_poly --- src/ao_two_e_ints/two_e_integrals.irp.f | 232 +++++++++++++++++++++--- src/utils/integration.irp.f | 129 +++++++++++-- 2 files changed, 317 insertions(+), 44 deletions(-) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 83fbadfd..4c3c6190 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -563,8 +563,20 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) +! call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) + integer :: ib, ic + if (ior(n_Ix,n_Iy) >= 0) then + do ib=0,n_Ix + do ic = 0,n_Iy + d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib) + enddo + enddo + + do n_pt_tmp = n_Ix+n_Iy, 0, -1 + if (d_poly(n_pt_tmp) /= 0.d0) exit + enddo + endif + if (n_pt_tmp == -1) then return endif @@ -573,8 +585,21 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) +! call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) + if (ior(n_pt_tmp,n_Iz) >= 0) then + ! Bottleneck here + do ib=0,n_pt_tmp + do ic = 0,n_Iz + d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib) + enddo + enddo + + do n_pt_out = n_pt_tmp+n_Iz, 0, -1 + if (d1(n_pt_out) /= 0.d0) exit + enddo + endif + + double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -921,8 +946,20 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= dble(a-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -943,8 +980,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif endif ny=0 @@ -961,9 +1009,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1001,8 +1059,20 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 @@ -1012,8 +1082,19 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end @@ -1040,8 +1121,20 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1059,8 +1152,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1070,9 +1174,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1119,8 +1233,21 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + integer :: ib + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif + return case default @@ -1137,8 +1264,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) X(ix) *= dble(c-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_01,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_01,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_01(0) * X(ib) + d(ib+1) = d(ib+1) + B_01(1) * X(ib) + d(ib+2) = d(ib+2) + B_01(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1147,8 +1285,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end select end @@ -1206,3 +1355,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) enddo end + + +subroutine multiply_poly_local(b,nb,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:nc) + double precision, intent(inout) :: d(0:nb+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 15d79622..c8a36775 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -428,6 +428,112 @@ end subroutine +subroutine multiply_poly_0c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:0+nc) + + integer :: ic + + do ic = 0,nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_1c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ic, id + if(nc < 0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + enddo + + do nd = nc+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_2c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ic, id, k + if (nc <0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(2+ic) + c(ic) * b(2) + enddo + + do nd = nc+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_3c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:3), c(0:nc) + double precision, intent(inout) :: d(0:3+nc) + + integer :: ic, id + if (nc <0) return + + do ic = 1,nc + d( ic) = d(1+ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(1+ic) + c(ic) * b(2) + d(3+ic) = d(1+ic) + c(ic) * b(3) + enddo + + do nd = nc+3,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + subroutine multiply_poly(b,nb,c,nc,d,nd) @@ -444,29 +550,16 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ndtmp integer :: ib, ic, id, k - if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0 - continue - else - return - endif - ndtmp = nb+nc + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 - do ic = 0,nc - d(ic) = d(ic) + c(ic) * b(0) - enddo - - do ib=1,nb - d(ib) = d(ib) + c(0) * b(ib) - do ic = 1,nc + do ib=0,nb + do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib) enddo enddo - do nd = ndtmp,0,-1 - if (d(nd) == 0.d0) then - cycle - endif - exit + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit enddo end From e3c0df574ee9bed8f2de3c21dc4506fd34fc7b7b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 01:40:40 +0200 Subject: [PATCH 15/79] Implementing stochastic (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 153 ++++++++---- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 320 ++++++++++++++++++++++++++ 3 files changed, 428 insertions(+), 49 deletions(-) create mode 100644 src/ccsd/ccsd_t_space_orb_stoch.irp.f diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 75752f5c..29ecca1c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -169,7 +169,9 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) call wall_time(tb) print*,'Time: ',tb-ta, ' s' diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 70900738..294296bf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,14 +19,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - call set_multiple_levels_omp(.False.) - - ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & @@ -36,7 +35,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO + !$OMP DO do a = 1, nV do b = 1, nV do i = 1, nO @@ -48,7 +47,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO + !$OMP DO do c = 1, nV do j = 1, nO do k = 1, nO @@ -63,7 +62,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -103,12 +102,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - energy = 0d0 + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,x1) & + !$OMP PRIVATE(a,b,c,e) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & !$OMP DEFAULT(SHARED) allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & @@ -119,46 +119,18 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV do b = a+1, nV do c = b+1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & - W_bca(i,j,k) - W_bac(i,j,k) + & - W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & - W_cba(i,j,k) - W_cab(i,j,k) + & - W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & - W_acb(i,j,k) - W_abc(i,j,k) + & - W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - enddo - c = a - do b = 1, nV - if (b == c) cycle - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -178,6 +150,91 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + +end + subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f new file mode 100644 index 00000000..e8fae5cd --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -0,0 +1,320 @@ +! Main + +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) + + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO + do d = 1, nV + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO + do d = 1, nV + T_voov(d,k,j,c) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & + + !$OMP DO + do c = 1, nV + do k = 1, nO + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO + do c = 1, nV + do b = 1, nV + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP END PARALLEL + + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + + double precision, allocatable :: memo(:), Pabc(:), waccu(:) + logical , allocatable :: computed(:) + integer*2 , allocatable :: abc(:,:) + integer*8 :: Nabc, i8 + integer*8, allocatable :: iorder(:) + double precision :: eocc + double precision :: Pabc_norm, sum_w + + + ! Prepare table of triplets (a,b,c) + + Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV + allocate (memo(Nabc), computed(Nabc), Pabc(Nabc), waccu(0:Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) + +! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) + memo(:) = 0.d0 + computed(:) = .False. + Nabc = 0_8 + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + Nabc = Nabc + 1_8 +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))*(f_v(a)*f_v(b)*f_v(c))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))**2) + Pabc(Nabc) = 1.d0/(f_v(a) + f_v(b) + f_v(c)) + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = c + enddo + + Nabc = Nabc + 1_8 + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = a +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))*(f_v(a)*f_v(b)*f_v(a))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))**2) + Pabc(Nabc) = 1.d0/(2.d0*f_v(a) + f_v(b)) + + Nabc = Nabc + 1_8 + abc(1,Nabc) = b + abc(2,Nabc) = a + abc(3,Nabc) = b +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))*(f_v(b)*f_v(a)*f_v(b))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))**2) + Pabc(Nabc) = 1.d0/(f_v(a) + 2.d0*f_v(b)) + enddo + enddo + + do i8=1,Nabc + iorder(i8) = i8 + enddo + + ! Sort triplets in decreasing Pabc + call dsort_big(Pabc, iorder, Nabc) + + ! Normalize + Pabc_norm = 0.d0 + do i8=Nabc,1,-1 + Pabc_norm = Pabc_norm + Pabc(i8) + enddo + Pabc_norm = 1.d0/Pabc_norm + do i8=Nabc,1,-1 + Pabc(i8) = Pabc(i8) * Pabc_norm + enddo + + call i8set_order_big(abc, iorder, Nabc) + + + ! Cumulative distribution for sampling + waccu(Nabc) = 0.d0 + sum_w = 0.d0 + do i8=Nabc-1,1,-1 + waccu(i8) = waccu(i8+1) - Pabc(i8) + enddo + waccu(:) = waccu(:) + 1.d0 + waccu(0) = 0.d0 + + Pabc(:) = 1.d0/Pabc(:) * (1.d0/3.d0) + + logical :: converged + double precision :: ET, ET2, eta, variance, average, error, sample + integer*8 :: isample, ieta, Ncomputed + integer*8, external :: find_sample + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + converged = .False. + ET = 0.d0 + ET2 = 0.d0 + Ncomputed = 0_8 + isample = 0_8 + + average = 0.d0 + variance = 0.d0 + double precision :: t00, t01 + call wall_time(t00) +! do ieta=1,Nabc + do while (.not.converged) + call random_number(eta) +! eta = eta/dble(1000) +! do k=0,1000-1 +! ieta = find_sample(eta+dble(k)/dble(1000),waccu,Nabc) + ieta = find_sample(eta,waccu,Nabc) + isample = isample+1_8 + + if (.not.computed(ieta)) then + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + endif + computed(ieta) = .True. + Ncomputed += 1_8 + call wall_time(t01) + if (t01-t00 > 1.d0) then + t00 = t01 + print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + endif +! print *, memo(ieta), Pabc(ieta), memo(ieta) * Pabc(ieta) + endif + sample = memo(ieta) * Pabc(ieta) + ET = ET + sample + ET2 = ET2 + sample*sample + average = ET/dble(isample) + variance = ET2/dble(isample) - average*average + converged = (Ncomputed >= (Nabc*90_8)/100_8) .or. (isample>=1000*Nabc) +! enddo + enddo + print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + energy = average + +! !$OMP PARALLEL & +! !$OMP PRIVATE(a,b,c,e) & +! !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & +! !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & +! !$OMP DEFAULT(SHARED) +! allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & +! W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & +! V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & +! V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) +! e = 0d0 +! !$OMP DO SCHEDULE(dynamic) +! do a = 1, nV +! do b = a+1, nV +! do c = b+1, nV +! e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & +! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & +! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) +! enddo +! enddo +! +! do b = 1, nV +! if (b == a) cycle +! e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & +! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & +! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) +! enddo +! enddo +! !$OMP END DO NOWAIT +! +! !$OMP CRITICAL +! energy = energy + e +! !$OMP END CRITICAL +! +! deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & +! V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) +! +! !$OMP END PARALLEL + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) +end + + +integer*8 function find_sample(v, w, n) + implicit none + BEGIN_DOC +! Finds sample v in weights w + END_DOC + integer*8, intent(in) :: n + double precision, intent(in) :: v, w(0:n) + integer*8 :: i,l,r + + l=0 + r=n + + do while(r-l > 1) + i = shiftr(r+l,1) + if(w(i) < v) then + l = i + else + r = i + end if + end do + i = r + do r=i+1,n + if (w(r) /= w(i)) then + exit + endif + enddo + find_sample = r-1 +end function + From 134b6d016301d41ca78dfed2443118616d849ec2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 01:43:32 +0200 Subject: [PATCH 16/79] Adding tasks --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 153 +++++++++++++++++++--------- 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 70900738..294296bf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,14 +19,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - call set_multiple_levels_omp(.False.) - - ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & @@ -36,7 +35,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO + !$OMP DO do a = 1, nV do b = 1, nV do i = 1, nO @@ -48,7 +47,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO + !$OMP DO do c = 1, nV do j = 1, nO do k = 1, nO @@ -63,7 +62,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -103,12 +102,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - energy = 0d0 + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,x1) & + !$OMP PRIVATE(a,b,c,e) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & !$OMP DEFAULT(SHARED) allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & @@ -119,46 +119,18 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV do b = a+1, nV do c = b+1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & - W_bca(i,j,k) - W_bac(i,j,k) + & - W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & - W_cba(i,j,k) - W_cab(i,j,k) + & - W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & - W_acb(i,j,k) - W_abc(i,j,k) + & - W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - enddo - c = a - do b = 1, nV - if (b == c) cycle - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -178,6 +150,91 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + +end + subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none From de07f73ed9da98850002c459015ffc9e1868ed16 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 18:32:15 +0200 Subject: [PATCH 17/79] Semi-stochastic (T) OK --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 101 ++++---- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 318 ++++++++++++++------------ 2 files changed, 224 insertions(+), 195 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 294296bf..1aab6bd7 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -10,12 +10,6 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) - double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) - double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) - double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -105,32 +99,22 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, external :: ccsd_t_task_aba double precision, external :: ccsd_t_task_abc - !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,e) & - !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & - !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP DEFAULT(SHARED) - allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & - W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & - V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & - V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 !$OMP DO SCHEDULE(dynamic) do a = 1, nV do b = a+1, nV do c = b+1, nV - e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) - e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -139,9 +123,6 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = energy + e !$OMP END CRITICAL - deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & - V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) - !$OMP END PARALLEL energy = energy / 3.d0 @@ -151,30 +132,34 @@ end double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& - V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & - W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) implicit none - integer, intent(in) :: nO,nV,a,b,c - double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) - double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) - double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) - double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) - double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) - double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision :: delta, delta_abc integer :: i,j,k - delta_abc = f_v(a) + f_v(b) + f_v(c) - e = 0.d0 + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + do k = 1, nO do j = 1, nO do i = 1, nO @@ -193,33 +178,40 @@ double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& enddo enddo + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + end double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& - V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & - W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) implicit none - integer, intent(in) :: nO,nV,a,b - double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) - double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) - double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) - double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) - double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) - double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision :: delta, delta_abc integer :: i,j,k - delta_abc = f_v(a) + f_v(b) + f_v(a) - e = 0.d0 + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + do k = 1, nO do j = 1, nO do i = 1, nO @@ -233,6 +225,9 @@ double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& enddo enddo + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + end subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index e8fae5cd..0081e9e7 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,4 @@ ! Main - subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) implicit none @@ -10,12 +9,6 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) - double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) - double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) - double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -104,33 +97,32 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, external :: ccsd_t_task_aba double precision, external :: ccsd_t_task_abc +! logical, external :: omp_test_lock double precision, allocatable :: memo(:), Pabc(:), waccu(:) - logical , allocatable :: computed(:) + integer*8, allocatable :: sampled(:) +! integer(omp_lock_kind), allocatable :: lock(:) integer*2 , allocatable :: abc(:,:) integer*8 :: Nabc, i8 integer*8, allocatable :: iorder(:) double precision :: eocc - double precision :: Pabc_norm, sum_w + double precision :: norm + integer :: kiter, isample ! Prepare table of triplets (a,b,c) Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV - allocate (memo(Nabc), computed(Nabc), Pabc(Nabc), waccu(0:Nabc)) - allocate (abc(4,Nabc), iorder(Nabc)) + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) ! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) - memo(:) = 0.d0 - computed(:) = .False. Nabc = 0_8 do a = 1, nV do b = a+1, nV do c = b+1, nV Nabc = Nabc + 1_8 -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))*(f_v(a)*f_v(b)*f_v(c))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))**2) - Pabc(Nabc) = 1.d0/(f_v(a) + f_v(b) + f_v(c)) + Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) abc(1,Nabc) = a abc(2,Nabc) = b abc(3,Nabc) = c @@ -140,17 +132,13 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ abc(1,Nabc) = a abc(2,Nabc) = b abc(3,Nabc) = a -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))*(f_v(a)*f_v(b)*f_v(a))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))**2) - Pabc(Nabc) = 1.d0/(2.d0*f_v(a) + f_v(b)) + Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) Nabc = Nabc + 1_8 abc(1,Nabc) = b abc(2,Nabc) = a abc(3,Nabc) = b -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))*(f_v(b)*f_v(a)*f_v(b))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))**2) - Pabc(Nabc) = 1.d0/(f_v(a) + 2.d0*f_v(b)) + Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) enddo enddo @@ -162,13 +150,13 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call dsort_big(Pabc, iorder, Nabc) ! Normalize - Pabc_norm = 0.d0 + norm = 0.d0 do i8=Nabc,1,-1 - Pabc_norm = Pabc_norm + Pabc(i8) + norm = norm + Pabc(i8) enddo - Pabc_norm = 1.d0/Pabc_norm - do i8=Nabc,1,-1 - Pabc(i8) = Pabc(i8) * Pabc_norm + norm = 1.d0/norm + do i8=1,Nabc + Pabc(i8) = Pabc(i8) * norm enddo call i8set_order_big(abc, iorder, Nabc) @@ -176,145 +164,191 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ ! Cumulative distribution for sampling waccu(Nabc) = 0.d0 - sum_w = 0.d0 do i8=Nabc-1,1,-1 - waccu(i8) = waccu(i8+1) - Pabc(i8) + waccu(i8) = waccu(i8+1) - Pabc(i8+1) enddo waccu(:) = waccu(:) + 1.d0 - waccu(0) = 0.d0 - Pabc(:) = 1.d0/Pabc(:) * (1.d0/3.d0) + logical :: converged, do_comp + double precision :: eta, variance, error, sample + double precision :: t00, t01 + integer*8 :: ieta, Ncomputed + integer*8, external :: binary_search - logical :: converged - double precision :: ET, ET2, eta, variance, average, error, sample - integer*8 :: isample, ieta, Ncomputed - integer*8, external :: find_sample + integer :: nbuckets + nbuckets = 100 - allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & - W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & - V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & - V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + double precision, allocatable :: wsum(:) + allocate(wsum(nbuckets)) converged = .False. - ET = 0.d0 - ET2 = 0.d0 Ncomputed = 0_8 - isample = 0_8 - average = 0.d0 + energy = 0.d0 variance = 0.d0 - double precision :: t00, t01 - call wall_time(t00) -! do ieta=1,Nabc - do while (.not.converged) - call random_number(eta) -! eta = eta/dble(1000) -! do k=0,1000-1 -! ieta = find_sample(eta+dble(k)/dble(1000),waccu,Nabc) - ieta = find_sample(eta,waccu,Nabc) - isample = isample+1_8 + memo(:) = 0.d0 + sampled(:) = -1_8 - if (.not.computed(ieta)) then + integer*8 :: ileft, iright, imin + ileft = 1_8 + iright = Nabc + integer*8, allocatable :: bounds(:,:) + + allocate (bounds(2,nbuckets)) + do isample=1,nbuckets + eta = 1.d0/dble(nbuckets) * dble(isample) + ieta = binary_search(waccu,eta,Nabc,ileft,iright) + bounds(1,isample) = ileft + bounds(2,isample) = ieta + ileft = ieta+1 + wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) ) + enddo + + Pabc(:) = 1.d0/Pabc(:) + + call wall_time(t00) + imin = 1_8 + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + !$OMP DEFAULT(SHARED) + + do kiter=1,Nabc + + !$OMP MASTER + do while ((imin <= Nabc).and.(sampled(imin)>-1_8)) + imin = imin+1 + enddo + + ! Deterministic part + if (imin < Nabc) then + ieta=imin + sampled(ieta) = 0_8 a = abc(1,ieta) b = abc(2,ieta) c = abc(3,ieta) - if (a/=c) then - memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) - else - memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) - endif - computed(ieta) = .True. Ncomputed += 1_8 - call wall_time(t01) - if (t01-t00 > 1.d0) then - t00 = t01 - print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 endif -! print *, memo(ieta), Pabc(ieta), memo(ieta) * Pabc(ieta) + !$OMP END TASK endif - sample = memo(ieta) * Pabc(ieta) - ET = ET + sample - ET2 = ET2 + sample*sample - average = ET/dble(isample) - variance = ET2/dble(isample) - average*average - converged = (Ncomputed >= (Nabc*90_8)/100_8) .or. (isample>=1000*Nabc) -! enddo - enddo - print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) - energy = average -! !$OMP PARALLEL & -! !$OMP PRIVATE(a,b,c,e) & -! !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & -! !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & -! !$OMP DEFAULT(SHARED) -! allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & -! W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & -! V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & -! V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) -! e = 0d0 -! !$OMP DO SCHEDULE(dynamic) -! do a = 1, nV -! do b = a+1, nV -! do c = b+1, nV -! e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & -! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & -! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) -! enddo -! enddo -! -! do b = 1, nV -! if (b == a) cycle -! e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & -! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & -! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) -! enddo -! enddo -! !$OMP END DO NOWAIT -! -! !$OMP CRITICAL -! energy = energy + e -! !$OMP END CRITICAL -! -! deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & -! V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) -! -! !$OMP END PARALLEL + ! Stochastic part + call random_number(eta) + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + cycle + endif + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + + if (sampled(ieta) == -1_8) then + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + sampled(ieta) = sampled(ieta)+1_8 + + enddo + + call wall_time(t01) + if (t01-t00 > 1.0d0) then + t00 = t01 + + !$OMP TASKWAIT + + double precision :: ET, ET2 + double precision :: energy_stoch, energy_det + double precision :: scale + double precision :: w + double precision :: tmp + energy_stoch = 0.d0 + energy_det = 0.d0 + norm = 0.d0 + scale = 1.d0 + ET = 0.d0 + ET2 = 0.d0 + + + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample))) + scale = scale - wsum(isample) + else + exit + endif + enddo + + do ieta=bounds(1,isample), Nabc + w = dble(max(sampled(ieta),0_8)) + tmp = w * memo(ieta) * Pabc(ieta) + ET = ET + tmp + ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta) + norm = norm + w + enddo + norm = norm/scale + if (norm > 0.d0) then + energy_stoch = ET / norm + variance = ET2 / norm - energy_stoch*energy_stoch + endif + + energy = energy_det + energy_stoch + + print *, real(energy), ' +/- ', real(sqrt(variance/(norm-1.d0))), isample, real(Ncomputed)/real(Nabc) + endif + !$OMP END MASTER + if (imin >= Nabc) exit + enddo + + !$OMP END PARALLEL deallocate(X_vovv,X_ooov,T_voov,T_oovv) end -integer*8 function find_sample(v, w, n) - implicit none - BEGIN_DOC -! Finds sample v in weights w - END_DOC - integer*8, intent(in) :: n - double precision, intent(in) :: v, w(0:n) - integer*8 :: i,l,r - l=0 - r=n +integer*8 function binary_search(arr, key, size) + implicit none + BEGIN_DOC +! Searches the key in array arr(1:size) between l_in and r_in, and returns its index + END_DOC + integer*8 :: size, i, j, mid, l_in, r_in + double precision, dimension(size) :: arr(1:size) + double precision :: key - do while(r-l > 1) - i = shiftr(r+l,1) - if(w(i) < v) then - l = i - else - r = i - end if - end do - i = r - do r=i+1,n - if (w(r) /= w(i)) then - exit - endif - enddo - find_sample = r-1 -end function + i = 1_8 + j = size + + do while (j >= i) + mid = i + (j - i) / 2 + if (arr(mid) >= key) then + if (mid > 1 .and. arr(mid - 1) < key) then + binary_search = mid + return + end if + j = mid - 1 + else if (arr(mid) < key) then + i = mid + 1 + else + binary_search = mid + 1 + return + end if + end do + binary_search = i +end function binary_search From ee790fa1d82e94724cbc34cb5c4c802ca001d2b4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 19:54:30 +0200 Subject: [PATCH 18/79] Formatted output in (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 289 +++++++++++++------------- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 15 +- 2 files changed, 158 insertions(+), 146 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 29ecca1c..287d5b03 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,5 +1,5 @@ subroutine run_ccsd_space_orb - + implicit none integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d @@ -12,12 +12,12 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) - + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) - + PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) @@ -35,11 +35,11 @@ subroutine run_ccsd_space_orb if (cc_ref_is_open_shell) then call abort endif - + ! Number of occ/vir spatial orb nO = nOa nV = nVa - + allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir @@ -47,7 +47,7 @@ subroutine run_ccsd_space_orb !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) !print*,'occ',list_occ !print*,'vir',list_vir - + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) @@ -76,7 +76,7 @@ subroutine run_ccsd_space_orb print*,'Det energy', uncorr_energy call ccsd_energy_space(nO,nV,tau,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy - + nb_iter = 0 not_converged = .True. max_r1 = 0d0 @@ -86,9 +86,9 @@ subroutine run_ccsd_space_orb write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(ta) - + do while (not_converged) - + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vo(nO,nV,t1,t2,H_vo) @@ -97,7 +97,7 @@ subroutine run_ccsd_space_orb call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) max_r = max(max_r1,max_r2) - + ! Update if (cc_update_method == 'diis') then !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) @@ -122,7 +122,7 @@ subroutine run_ccsd_space_orb if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then not_converged = .False. endif - + enddo write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(tb) @@ -141,18 +141,18 @@ subroutine run_ccsd_space_orb call write_t1(nO,nV,t1) call write_t2(nO,nV,t2) - + ! Deallocation if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif deallocate(H_vv,H_oo,H_vo,r1,r2,tau) - + ! CCSD(T) double precision :: e_t - if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then ! Dumb way !call wall_time(ta) @@ -171,8 +171,11 @@ subroutine run_ccsd_space_orb call wall_time(ta) ! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + + e_t = uncorr_energy + energy ! For print in next call call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) print*,'Time: ',tb-ta, ' s' @@ -182,7 +185,7 @@ subroutine run_ccsd_space_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' print*,'' endif - + print*,'Reference determinant:' call print_det(det,N_int) @@ -234,7 +237,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) energy = energy + e !$omp end critical !$omp end parallel - + end ! Tau @@ -252,12 +255,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) ! internal integer :: i,j,a,b - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -269,7 +272,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) enddo !$OMP END DO !$OMP END PARALLEL - + end ! R1 @@ -285,7 +288,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! out double precision, intent(out) :: r1(nO,nV), max_r1 - + ! internal integer :: u,i,j,beta,a,b @@ -306,7 +309,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) - ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO @@ -326,7 +329,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call dgemm('T','N', nO, nV, nO, & 1d0, X_oo, size(X_oo,2), & t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) + 1d0, r1 , size(r1,1)) deallocate(X_oo) ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) @@ -375,7 +378,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -387,16 +390,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + call dgemv('T', nV*nO, nO*nV, & 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & H_vo , 1, & 1d0, r1 , 1) - + deallocate(X_voov) ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) - ! <=> + ! <=> ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) !do beta = 1, nV ! do u = 1, nO @@ -414,7 +417,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -431,17 +434,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & t1 , 1, & 1d0, r1 , 1) - + deallocate(X_ovov) - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) - ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do a = 1, nV ! do b = 1, nV - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) ! enddo ! enddo ! enddo @@ -454,24 +457,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) enddo enddo enddo enddo !$omp end do nowait - !$omp do + !$omp do do u = 1, nO do i = 1, nO do b = 1, nV do a = 1, nV - T_vvoo(a,b,i,u) = tau(i,u,a,b) + T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo enddo @@ -483,17 +486,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_vvov,T_vvoo) - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) - ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do j = 1, nO ! do a = 1, nV - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! enddo ! enddo ! enddo @@ -507,7 +510,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp private(u,a,i,j) & !$omp default(none) do u = 1, nO - !$omp do + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -523,7 +526,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & tau , size(tau,1) * size(tau,2) * size(tau,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_oovo) max_r1 = 0d0 @@ -538,7 +541,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,r1) & !$omp private(a,i) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do i = 1, nO r1(i,a) = -r1(i,a) @@ -546,7 +549,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + end ! H_oo @@ -578,7 +581,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -601,8 +604,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 1d0, tau , size(tau,1), & cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) - + 1d0, H_oo , size(H_oo,1)) + end ! H_vv @@ -633,7 +636,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -656,13 +659,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - - !$omp do + + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) enddo enddo enddo @@ -676,7 +679,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) 1d0, H_vv , size(H_vv,1)) deallocate(tmp_tau) - + end ! H_vo @@ -704,7 +707,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! enddo ! enddo - ! + ! ! enddo !enddo @@ -727,7 +730,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -746,7 +749,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) 1d0, H_vo, 1) deallocate(w) - + end ! R2 @@ -771,7 +774,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(g_occ(nO,nO), g_vir(nV,nV)) allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) allocate(A1(nO,nO,nO,nO)) - + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) call compute_A1(nO,nV,t1,t2,tau,A1) @@ -787,7 +790,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -863,7 +866,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -875,7 +878,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & @@ -885,7 +888,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +924,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +960,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +982,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -991,7 +994,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1009,13 +1012,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1036,12 +1039,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, t1, size(t1,1), & Y_vovv, size(Y_vovv,1), & 0d0, X_oovv, size(X_oovv,1)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1055,7 +1058,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_vovo,Y_vovv) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1079,7 +1082,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1092,7 +1095,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1111,13 +1114,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1138,12 +1141,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1155,7 +1158,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_vovo,Y_oovo) !do gam = 1, nV @@ -1183,7 +1186,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp private(u,v,gam,beta,i,a) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO @@ -1194,7 +1197,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do nowait enddo - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1206,17 +1209,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1228,9 +1231,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_ovvo,Y_voov) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1252,7 +1255,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1267,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1281,12 +1284,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1298,7 +1301,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1343,12 +1346,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -1367,7 +1370,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_ovov,Y_ovov,Z_ovov) - + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2) & @@ -1385,7 +1388,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + max_r2 = 0d0 do b = 1, nV do a = 1, nV @@ -1398,7 +1401,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo deallocate(g_occ,g_vir,J1,K1,A1) - + end ! A1 @@ -1427,12 +1430,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) = A1(u,v,i,j) & ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) - ! + ! ! do b = 1, nV ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - ! enddo + ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1440,7 +1443,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) - + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) !$omp parallel & !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & @@ -1494,7 +1497,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) enddo !$omp end do !$omp end parallel - + deallocate(X_vooo,Y_oooo) ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) @@ -1508,7 +1511,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) 1d0, tau , size(tau,1) * size(tau,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & 1d0, A1 , size(A1,1) * size(A1,2)) - + end ! B1 @@ -1530,28 +1533,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1) ! do beta = 1, nV ! do b = 1, nV ! do a = 1, nV - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) ! do i = 1, nO ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo - + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1563,7 +1566,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do nowait do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1574,14 +1577,14 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp end do nowait enddo !$omp end parallel - + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & t1 , size(t1,1), & 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) - + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & @@ -1592,7 +1595,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1604,9 +1607,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do !$omp end parallel - + deallocate(X_vvvo,Y_vvvv) - + end ! g_occ @@ -1627,14 +1630,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) !do i = 1, nO ! do u = 1, nO ! g_occ(u,i) = H_oo(u,i) - ! + ! ! do a = 1, nV ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) - ! + ! ! do j = 1, nO ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) ! enddo - ! + ! ! enddo ! enddo !enddo @@ -1655,8 +1658,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo enddo !$omp end do - - !$omp do + + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1668,7 +1671,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do !$omp end parallel - + end ! g_vir @@ -1689,23 +1692,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) !do beta = 1, nV ! do a = 1, nV ! g_vir(a,beta) = H_vv(a,beta) - ! + ! ! do i = 1, nO ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) - ! + ! ! do b = 1, nV ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) ! enddo - ! + ! ! enddo ! enddo !enddo - + call dgemm('N','N',nV,nV,nO, & -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - + !$omp parallel & !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & !$omp private(i,b,a,beta) & @@ -1718,7 +1721,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1730,7 +1733,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do !$omp end parallel - + end ! J1 @@ -1763,7 +1766,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! do b = 1, nV ! J1(u,a,beta,i) = J1(u,a,beta,i) & - ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) ! enddo ! do j = 1, nO @@ -1773,7 +1776,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1781,13 +1784,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) - + !$omp parallel & !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1810,7 +1813,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV*nO,nV,nO, & -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & t1 , size(t1,1), & @@ -1821,7 +1824,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp private(i,beta,a,u) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1847,7 +1850,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1859,7 +1862,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1885,7 +1888,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp private(i,beta,a,u,j,b) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,10 +1898,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait enddo - + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) do j = 1, nO - !$omp do + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,9 +1911,9 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait enddo - + do j = 1, nO - !$omp do + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1921,7 +1924,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp end do nowait enddo !$omp end parallel - + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & @@ -1944,8 +1947,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) - + deallocate(X_ovvo,Z_ovvo,Y_ovov) + end ! K1 @@ -1980,7 +1983,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! do b = 1, nV ! K1(u,a,i,beta) = K1(u,a,i,beta) & - ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) ! enddo ! do j = 1, nO @@ -1989,19 +1992,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2072,5 +2075,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end parallel deallocate(X,Y,Z) - + end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 0081e9e7..049c57e8 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -7,13 +7,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(out) :: energy + double precision, intent(inout) :: energy double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb + double precision :: e,ta,tb,eccsd + eccsd = energy call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) @@ -206,6 +207,12 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) + print '(A)', '' + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' | E(CCSD(T)) | Error | % |' + print '(A)', ' +----------------------+--------------+----------+' + + call wall_time(t00) imin = 1_8 !$OMP PARALLEL & @@ -309,13 +316,15 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print *, real(energy), ' +/- ', real(sqrt(variance/(norm-1.d0))), isample, real(Ncomputed)/real(Nabc) + print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', '' deallocate(X_vovv,X_ooov,T_voov,T_oovv) end From 46cbd80b9596a6e2c19e2db13ea800376c8cfb55 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 10:44:32 +0200 Subject: [PATCH 19/79] Accelerated Cholesky --- src/ao_two_e_ints/EZFIO.cfg | 6 +++ src/ao_two_e_ints/cholesky.irp.f | 81 ++++++++++++++++--------------- src/ccsd/ccsd_space_orb_sub.irp.f | 10 ++-- 3 files changed, 51 insertions(+), 46 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 4ab080ec..9f523fca 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -11,6 +11,12 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_ao +[ao_cholesky_threshold] +type: Threshold +doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero +interface: ezfio,provider,ocaml +default: 1.e-12 + [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..3da827e1 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - integer :: i,j,k,l - double precision :: xnorm0, x, integral - double precision, external :: ao_two_e_integral - - cholesky_ao_num_guess = 0 - xnorm0 = 0.d0 - x = 0.d0 - do j=1,ao_num - do i=1,ao_num - integral = ao_two_e_integral(i,i,j,j) - if (integral > ao_integrals_threshold) then - cholesky_ao_num_guess += 1 - else - x += integral - endif - enddo - enddo - print *, 'Cholesky decomposition of AO integrals' - print *, '--------------------------------------' - print *, '' - print *, 'Estimated Error: ', x - print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' - + cholesky_ao_num_guess = ao_num*ao_num / 2 END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -39,7 +17,7 @@ END_PROVIDER END_DOC type(c_ptr) :: ptr - integer :: fd, i,j,k,l, rank + integer :: fd, i,j,k,l,m,rank double precision, pointer :: ao_integrals(:,:,:,:) double precision, external :: ao_two_e_integral @@ -49,24 +27,49 @@ END_PROVIDER 8, fd, .False., ptr) call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - double precision :: integral + print*, 'Providing the AO integrals (Cholesky)' + call wall_time(wall_1) + call cpu_time(cpu_1) + + ao_integrals = 0.d0 + + double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) - do l=1,ao_num - do j=1,l - do k=1,ao_num - do i=1,k - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - enddo + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT enddo - enddo + !$OMP MASTER + call wall_time(wall_2) + print '(F10.2,'' % in'', 4X, I10, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 287d5b03..2e0ccd8f 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -16,7 +16,7 @@ subroutine run_ccsd_space_orb double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) - integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + integer :: nO, nV, nOa, nVa PROVIDE mo_two_e_integrals_in_map @@ -24,12 +24,8 @@ subroutine run_ccsd_space_orb print*,'Reference determinant:' call print_det(det,N_int) - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) + nOa = cc_nOa + nVa = cc_nVa ! Check that the reference is a closed shell determinant if (cc_ref_is_open_shell) then From a8948d091667801acb7800a7510e45b136b59fd3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 16:55:29 +0200 Subject: [PATCH 20/79] cholesky in big_array --- src/ao_two_e_ints/cholesky.irp.f | 97 +++++++---- src/ccsd/ccsd_space_orb_sub.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 30 ++++ src/mo_two_e_ints/integrals_3_index.irp.f | 70 ++++++-- src/mo_two_e_ints/mo_bi_integrals.irp.f | 27 ++- src/utils_cc/energy.irp.f | 5 +- src/utils_cc/mo_integrals_cc.irp.f | 197 +++++++++++----------- 8 files changed, 281 insertions(+), 149 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 3da827e1..bb81b141 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -35,45 +35,82 @@ END_PROVIDER double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero + double precision, external :: get_ao_two_e_integral - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral + if (read_ao_two_e_integrals) then + PROVIDE ao_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo enddo enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - !$OMP MASTER - call wall_time(wall_2) - print '(F10.2,'' % in'', 4X, I10, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + else + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + + endif ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 2e0ccd8f..256117d6 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,7 +18,7 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa - PROVIDE mo_two_e_integrals_in_map +! PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) print*,'Reference determinant:' diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 049c57e8..1f3bebc2 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -274,7 +274,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo call wall_time(t01) - if (t01-t00 > 1.0d0) then + if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then t00 = t01 !$OMP TASKWAIT diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..b5b39b3b 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,11 +6,41 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) enddo !$OMP END PARALLEL DO + print *, '' + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: i,j,k + double precision, allocatable :: buffer(:,:) + + print *, 'AO->MO Transformation of Cholesky vectors .' + !$OMP PARALLEL PRIVATE(i,j,k,buffer) + allocate(buffer(mo_num,mo_num)) + !$OMP DO SCHEDULE(static) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp(k,i,j) = buffer(i,j) + enddo + enddo + enddo + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + print *, '' END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index 4ffb0134..d807f619 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -4,24 +4,68 @@ BEGIN_DOC ! big_array_coulomb_integrals(j,i,k) = = (ik|jj) ! - ! big_array_exchange_integrals(i,j,k) = = (ij|kj) + ! big_array_exchange_integrals(j,i,k) = = (ij|kj) END_DOC - integer :: i,j,k,l + integer :: i,j,k,l,a double precision :: get_two_e_integral double precision :: integral - do k = 1, mo_num - do i = 1, mo_num - do j = 1, mo_num - l = j - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - big_array_coulomb_integrals(j,i,k) = integral - l = j - integral = get_two_e_integral(i,j,l,k,mo_integrals_map) - big_array_exchange_integrals(j,i,k) = integral + if (do_ao_cholesky) then + + double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) + allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + do j=1,mo_num + buffer_jj(:,j) = cholesky_mo_transp(:,j,j) + enddo + + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + buffer_jj, cholesky_ao_num, 0.d0, & + buffer, mo_num*mo_num) + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + big_array_coulomb_integrals(j,i,k) = buffer(i,k,j) + enddo + enddo + enddo + deallocate(buffer_jj) + + allocate(buffer_jj(mo_num,mo_num)) + + do j = 1, mo_num + + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + buffer_jj, mo_num) + + do k=1,mo_num + do i=1,mo_num + big_array_exchange_integrals(j,i,k) = buffer_jj(i,k) + enddo + enddo + enddo + + deallocate(buffer_jj) + + else + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + l = j + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + big_array_coulomb_integrals(j,i,k) = integral + l = j + integral = get_two_e_integral(i,j,l,k,mo_integrals_map) + big_array_exchange_integrals(j,i,k) = integral + enddo + enddo enddo - enddo - enddo + + endif END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b7ef901d..a461504e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1353,15 +1353,30 @@ END_PROVIDER integer :: i,j double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map - mo_two_e_integrals_jj = 0.d0 - mo_two_e_integrals_jj_exchange = 0.d0 + + if (do_ao_cholesky) then + do j=1,mo_num + do i=1,mo_num + !TODO: use dgemm + mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) + mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + enddo + enddo + + else + + do j=1,mo_num + do i=1,mo_num + mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) + mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) + enddo + enddo + + endif do j=1,mo_num do i=1,mo_num - mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) - mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) - mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) + mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) enddo enddo diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f index 33e0cbae..fc1451ba 100644 --- a/src/utils_cc/energy.irp.f +++ b/src/utils_cc/energy.irp.f @@ -5,9 +5,8 @@ subroutine det_energy(det,energy) integer(bit_kind), intent(in) :: det double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem - call i_H_j(det,det,N_int,energy) + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion - energy = energy + nuclear_repulsion - end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 9e244d82..485d7002 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) integer :: i1,i2,idx1,idx2 allocate(tmp_F(mo_num,mo_num)) - + call get_fock_matrix_spin(det,1,tmp_F) !$OMP PARALLEL & @@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) !$OMP END PARALLEL deallocate(tmp_F) - + end ! V @@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) double precision, intent(out) :: v(n1,n2,n3,n4) - integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO do i4 = 1, n4 - do i3 = 1, n3 - do i2 = 1, n2 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) do i1 = 1, n1 - idx4 = list4(i4) - idx3 = list3(i3) - idx2 = list2(i2) idx1 = list1(i1) - v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + + end ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] - implicit none - - integer :: i,j,k,l - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map - + integer :: i1,i2,i3,i4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & - !$OMP PRIVATE(i,j,k,l) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do l = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do i = 1, mo_num - cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + END_PROVIDER ! oooo @@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) - + do q = 1, cc_n_mo do p = 1, cc_n_mo cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) @@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] enddo FREE cc_space_v_vvvv - + END_PROVIDER ! iaia @@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) - + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) !$OMP PARALLEL & @@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) - + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) !$OMP PARALLEL & @@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift) else shift = n_S(1) endif - + end ! F @@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) ! Compute the Fock matrix corresponding to two lists of spin orbitals. ! Ex: occ/occ, occ/vir,... END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) integer, intent(in) :: list1(n1,2), list2(n2,2) integer, intent(in) :: dim1, dim2 - + double precision, intent(out) :: f(dim1, dim2) double precision, allocatable :: tmp_F(:,:) integer :: i,j, idx_i,idx_j,i_shift,j_shift integer :: tmp_i,tmp_j integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals allocate(tmp_F(mo_num,mo_num)) - + do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 @@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) enddo enddo - + deallocate(tmp_F) - + end ! Get F @@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f) BEGIN_DOC ! Fock matrix alpha or beta of an arbitrary det END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: s - + double precision, intent(out) :: f(mo_num,mo_num) - + integer :: p,q,i,s1,s2 integer(bit_kind) :: res(N_int,2) logical :: ok @@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f) s1 = 2 s2 = 1 endif - + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + !$OMP PARALLEL & - !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & !$OMP PRIVATE(p,q,ok,i,res)& !$OMP DEFAULT(NONE) !$OMP DO collapse(1) @@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f) do i = 1, mo_num call apply_hole(det, s1, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) endif enddo do i = 1, mo_num call apply_hole(det, s2, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) endif enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + end ! V @@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer :: si,sj,sk,sl,s PROVIDE cc_space_v - + !$OMP PARALLEL & !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 @@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then @@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo !$OMP END DO endif - + enddo enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx @@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, call shift_idx_spin(sl,n4_S,l_shift) tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - + !$OMP PARALLEL & !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & !$OMP i,j,k,idx_i,idx_j,idx_k,& !$OMP tmp_i,tmp_j,tmp_k)& !$OMP DEFAULT(NONE) - + do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_ij_l @@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sk,n3_S,k_shift) tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - + !$OMP PARALLEL & !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & !$OMP i,j,l,idx_i,idx_j,idx_l,& !$OMP tmp_i,tmp_j,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_i_kl @@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sj,n2_S,j_shift) tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - + !$OMP PARALLEL & !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & !$OMP i,k,l,idx_i,idx_k,idx_l,& !$OMP tmp_i,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end From 5817bbf573c5074ae4c31562cb03c47c69e148f7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 17:50:35 +0200 Subject: [PATCH 21/79] Reduced memory in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 105 +++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 8 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 256117d6..1467d9a4 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -764,7 +764,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! internal double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:) integer :: u,v,i,j,beta,gam,a,b allocate(g_occ(nO,nO), g_vir(nV,nV)) @@ -834,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! enddo !enddo - allocate(B1(nV,nV,nV,nV)) - call compute_B1(nO,nV,t1,t2,B1) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1,1) * size(B1,2), & - 1d0, r2, size(r2,1) * size(r2,2)) - deallocate(B1) +! allocate(B1(nV,nV,nV,nV)) +! call compute_B1(nO,nV,t1,t2,B1) + allocate(B1_gam(nV,nV,nV)) + do gam=1,nV + call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) + call dgemm('N','N',nO*nO,nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1_gam , size(B1_gam,1) * size(B1_gam,2), & + 1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2)) + enddo + deallocate(B1_gam) + !do gam = 1, nV ! do beta = 1, nV @@ -1512,6 +1517,90 @@ end ! B1 +subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) + + implicit none + + integer, intent(in) :: nO,nV,gam + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! +! do i = 1, nO +! B1(a,b,beta) = B1(a,b,beta) & +! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & +! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) +! enddo +! +! enddo +! enddo +! enddo + + double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) +! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + !$omp end do nowait + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + +! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, 1, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1(1,gam), size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta) + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + subroutine compute_B1(nO,nV,t1,t2,B1) implicit none From 1d5ff0df6629c3374829327df63c912dedd72e00 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 11:52:16 +0200 Subject: [PATCH 22/79] added the possibility to select 3idx, 4-idx and 5idx --- src/tc_bi_ortho/slater_tc_3e.irp.f | 24 +++++++++++++--------- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 4 ++-- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 8 ++++---- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 2 +- src/tc_bi_ortho/tc_hmat.irp.f | 16 ++++----------- src/tc_keywords/EZFIO.cfg | 18 ++++++++++++++++ 6 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 7b73d5f2..f95be64b 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -4,17 +4,21 @@ subroutine provide_all_three_ints_bi_ortho ! routine that provides all necessary three-electron integrals END_DOC if(three_body_h_tc)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + if(three_e_3_idx_term)then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + if(three_e_4_idx_term)then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + endif + if(.not.double_normal_ord.and.three_e_5_idx_term)then + PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort + PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then + PROVIDE normal_two_body_bi_orth + endif endif -if(.not.double_normal_ord)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort -else - PROVIDE normal_two_body_bi_orth -endif end subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 5a3f9935..1745e362 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na @@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 1b0e43bb..2d6bfb27 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -42,13 +42,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! opposite spin two-body htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif endif @@ -59,13 +59,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) endif diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 2f9d83bf..7178d6d9 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h htwoe -= buffer_x(i) enddo hthree = 0.d0 - if (three_body_h_tc.and.elec_num.gt.2)then + if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then call three_comp_fock_elem(key_i,h,p,spin,hthree) endif diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 3353d3e7..ec072531 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -9,33 +9,25 @@ implicit none integer :: i, j - double precision :: hmono,htwoe,hthree,htot + double precision :: htot PROVIDE N_int i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) & + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det do j = 1, N_det ! < J | Htilde | I > - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !print *, ' hmono = ', hmono - !print *, ' htwoe = ', htwoe - !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo !$OMP END PARALLEL DO -! print*,'htilde_matrix_elmt_bi_ortho = ' -! do i = 1, min(100,N_det) -! write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i) -! enddo - END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 3a26a6eb..484bd1f0 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml default: True +[three_e_3_idx_term] +type: logical +doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_4_idx_term] +type: logical +doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_5_idx_term] +type: logical +doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + [pure_three_body_h_tc] type: logical doc: If |true|, pure triple excitation three-body terms are included From daf8b1c3dcef5f066d1add8b2cc751f03544ba98 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 18:17:17 +0200 Subject: [PATCH 23/79] renaming the routines in tc slater rules in _slow when they are naively built --- src/tc_bi_ortho/dressing_vectors_lr.irp.f | 8 ++-- src/tc_bi_ortho/e_corr_bi_ortho.irp.f | 18 +++---- src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 12 ++--- src/tc_bi_ortho/print_tc_wf.irp.f | 6 +-- src/tc_bi_ortho/pt2_tc_cisd.irp.f | 8 ++-- ...er_tc_3e.irp.f => slater_tc_3e_slow.irp.f} | 28 ++--------- src/tc_bi_ortho/slater_tc_opt.irp.f | 23 +++++++++ src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 4 +- .../{slater_tc.irp.f => slater_tc_slow.irp.f} | 47 +++++++------------ src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 12 ++--- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 5 +- src/tc_bi_ortho/tc_som.irp.f | 4 +- src/tc_bi_ortho/tc_utils.irp.f | 4 +- src/tc_bi_ortho/test_normal_order.irp.f | 6 +-- src/tc_bi_ortho/test_s2_tc.irp.f | 2 +- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 14 +++--- src/tc_bi_ortho/test_tc_fock.irp.f | 7 ++- 17 files changed, 97 insertions(+), 111 deletions(-) rename src/tc_bi_ortho/{slater_tc_3e.irp.f => slater_tc_3e_slow.irp.f} (87%) rename src/tc_bi_ortho/{slater_tc.irp.f => slater_tc_slow.irp.f} (85%) diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f index 08913bab..ed663f02 100644 --- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I | H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f index 3a715b44..6d5c3b21 100644 --- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index b7129d36..1d1b26cc 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -1,4 +1,4 @@ -subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo @@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) end -subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 @@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 0cf3ca87..0c4198a9 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -49,12 +49,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index 50d9dd45..9cb9a600 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -36,11 +36,11 @@ subroutine routine e_corr_abs = 0.d0 e_corr_pos = 0.d0 e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f similarity index 87% rename from src/tc_bi_ortho/slater_tc_3e.irp.f rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f index f95be64b..6abb6b78 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -1,27 +1,5 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif - endif -end -subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) +subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -112,7 +90,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) end -subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -207,7 +185,7 @@ end ! --- -subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a19d4688..3fd2576a 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,3 +1,26 @@ +subroutine provide_all_three_ints_bi_ortho + implicit none + BEGIN_DOC +! routine that provides all necessary three-electron integrals + END_DOC + if(three_body_h_tc)then + if(three_e_3_idx_term)then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + if(three_e_4_idx_term)then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + endif + if(.not.double_normal_ord.and.three_e_5_idx_term)then + PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort + PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then + PROVIDE normal_two_body_bi_orth + endif + endif +end + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 1745e362..531f0141 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -7,11 +7,11 @@ ! Various component of the TC energy for the reference "HF" Slater determinant END_DOC double precision :: hmono, htwoe, htot, hthree - call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc)then - call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree) + call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f similarity index 85% rename from src/tc_bi_ortho/slater_tc.irp.f rename to src/tc_bi_ortho/slater_tc_slow.irp.f index 2c0ae2ca..1833d20f 100644 --- a/src/tc_bi_ortho/slater_tc.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) BEGIN_DOC ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) if(degree.gt.2)then htot = 0.d0 else - call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) endif -end subroutine htilde_mu_mat_bi_ortho_tot +end subroutine htilde_mu_mat_bi_ortho_tot_slow ! -- -subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) +subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) BEGIN_DOC ! @@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot if(degree.gt.2) return if(degree == 0)then - call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) else if (degree == 1)then - call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2)then - call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) endif if(three_body_h_tc) then if(degree == 2) then - if(.not.double_normal_ord) then - call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) + if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then + call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) endif - else if(degree == 1) then - call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) - else if(degree == 0) then - call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) + else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then + call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) + else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then + call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) endif endif @@ -89,7 +89,7 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) +subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) BEGIN_DOC ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS @@ -188,7 +188,7 @@ end -subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS @@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) return endif -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! do i = 1, Nint -! key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) -! key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) -! enddo -! call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) -! else call bitstring_to_list_ab(key_i, occ, Ne, Nint) -! endif call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) @@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) ! opposite spin two-body ! key_j, key_i htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ??? endif else @@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ??? htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ??? endif @@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) end -subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4ae44148..4c3c0788 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -11,10 +11,10 @@ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) dressing_dets = 0.d0 do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 @@ -29,7 +29,7 @@ vec_tmp(istate,istate) = 1.d0 enddo print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) do i = 1, N_det e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) enddo @@ -41,8 +41,8 @@ it = 0 dressing_dets = 0.d0 double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav + external htc_bi_ortho_calc_tdav_slow + external htcdag_bi_ortho_calc_tdav_slow logical :: converged do while (dabs(E_before-E_current).gt.thr) it += 1 @@ -66,7 +66,7 @@ do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) print*,'outside Davidson' print*,'eigval_tmp(1) = ',eigval_tmp(1) do i = 1, N_det diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a83d6cd0..db4c5e28 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -207,8 +207,6 @@ end else ! n_det > N_det_max_full double precision, allocatable :: H_jj(:),vec_tmp(:,:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt @@ -217,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' @@ -259,7 +257,6 @@ end do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. i_it = 0 diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f index 291c52ef..a7e4d09e 100644 --- a/src/tc_bi_ortho/tc_som.irp.f +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -56,8 +56,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..24bb7017 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -12,7 +12,7 @@ subroutine write_tc_energy() do i = 1, N_det do j = 1, N_det !htot = htilde_matrix_elmt_bi_ortho(i,j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot enddo @@ -45,7 +45,7 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) SIGMA_TC = SIGMA_TC + htot * htot enddo diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index 118e481a..cb0c355c 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -35,7 +35,7 @@ subroutine test det_i = ref_bitmask call do_single_excitation(det_i,h1,p1,s1,i_ok) call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree *= phase @@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4debe2e2..1f7bdfda 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo ! Preparing the left-eigenvector print*,'Computing the left-eigenvector ' diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 6721c285..df86ea65 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -31,7 +31,7 @@ subroutine test_h_u0 u_0(i) = psi_r_coef_bi_ortho(i,1) enddo call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det) + call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det) print*,'difference right ' accu = 0.d0 do i = 1, N_det @@ -42,7 +42,7 @@ subroutine test_h_u0 do_right = .False. v_0_new = 0.d0 call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right) + call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right) print*,'difference left' accu = 0.d0 do i = 1, N_det @@ -63,7 +63,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -99,7 +99,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -146,7 +146,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -183,7 +183,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -225,7 +225,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index ebd43a7a..b7de067f 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -25,8 +25,7 @@ subroutine test_3e implicit none double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) -! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree) + call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) print*,'hmono = ',hmono print*,'htwoe = ',htwoe print*,'hthree= ',hthree @@ -88,7 +87,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -156,7 +155,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' From fd051ae020927be5e495dc3da3fa661ba55cd6ee Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 18:39:48 +0200 Subject: [PATCH 24/79] some cleaning in slow and no slow tc routines --- src/cipsi_tc_bi_ortho/selection.irp.f | 5 ++--- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 4c271a4b..77377554 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! and transpose ! ------------------------------------------- -! call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii) double precision :: hmono, htwoe, hthree call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states @@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index db4c5e28..fa946d6a 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -215,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' From 4d9cdf9df1d8e0d61006c1d348f28e96c0946464 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 24 May 2023 11:06:00 +0200 Subject: [PATCH 25/79] added new mu(r) jastrow --- src/non_h_ints_mu/jast_deriv.irp.f | 167 ++++++++++++++++++++++++++++- src/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 172 insertions(+), 1 deletion(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index cbd0b406..5e99600e 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -187,6 +187,19 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) + BEGIN_DOC +! gradient of j(mu(r1,r2),r12) form of jastrow. +! +! if mu(r1,r2) = cst ---> j1b_type < 200 and +! +! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) +! +! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and +! +! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) +! +! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + END_DOC include 'constants.include.F' implicit none @@ -515,6 +528,9 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) double precision :: r(3) double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume if(j1b_type .eq. 200) then @@ -578,8 +594,84 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - else + elseif(j1b_type .eq. 202) then + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' stop @@ -684,3 +776,76 @@ end function j12_mu_square ! --- +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 484bd1f0..62adb068 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -148,6 +148,12 @@ doc: a parameter used to define mu(r) interface: ezfio, provider, ocaml default: 6.203504908994001e-1 +[beta_rho_power] +type: double precision +doc: a parameter used to define mu(r) +interface: ezfio, provider, ocaml +default: 0.5 + [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue From 7e5f1ffc0c8fb9edc23e33f5be163b3d93ff124f Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 May 2023 12:57:58 +0200 Subject: [PATCH 26/79] added plot_mu_of_r.irp.f --- src/non_h_ints_mu/plot_mu_of_r.irp.f | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/non_h_ints_mu/plot_mu_of_r.irp.f diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f new file mode 100644 index 00000000..1100cd7c --- /dev/null +++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f @@ -0,0 +1,33 @@ +program plot_mu_of_r + implicit none + read_wf = .False. + touch read_wf + call routine_print + +end + + +subroutine routine_print + implicit none + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.mu_of_r' + i_unit_output = getUnitAndOpen(output,'w') + integer :: ipoint,nx + double precision :: xmax,xmin,r(3),dx + double precision :: mu_val, mu_der(3),dm_a,dm_b,grad + xmax = 5.D0 + xmin = -5.D0 + nx = 10000 + dx = (xmax - xmin)/dble(nx) + r = 0.d0 + r(1) = xmin + do ipoint = 1, nx + call mu_r_val_and_grad(r, r, mu_val, mu_der) + call dm_dft_alpha_beta_at_r(r,dm_a,dm_b) + grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 + grad = dsqrt(grad) + write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad + r(1) += dx + enddo +end From f2ca86ef604b906c3c10032691ac5e0e0ff83b53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 26 May 2023 11:48:08 +0200 Subject: [PATCH 27/79] Improved cholesky --- src/ao_two_e_ints/cholesky.irp.f | 25 +++++++++++++++++++++++-- src/cipsi/selection.irp.f | 3 +++ src/mo_two_e_ints/cholesky.irp.f | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..5aab12d9 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -51,8 +51,9 @@ END_PROVIDER double precision :: integral logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) do l=1,ao_num + !$OMP DO SCHEDULE(dynamic) do j=1,l do k=1,ao_num do i=1,k @@ -65,8 +66,28 @@ END_PROVIDER enddo enddo enddo + !$OMP END DO NOWAIT enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) + do l=1,ao_num + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,k + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 6f40a809..0705d103 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -76,6 +76,8 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) double precision, allocatable :: fock_diag_tmp(:,:) + if (csubset == 0) return + allocate(fock_diag_tmp(2,mo_num+1)) call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) @@ -177,6 +179,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d monoAdo = .true. monoBdo = .true. + if (csubset == 0) return do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..1706b2ec 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,6 +6,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) From b8bfab1d7cd8576c9597d92f70822d903628a6a6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 27 May 2023 22:34:40 +0200 Subject: [PATCH 28/79] start working on NO --- src/tc_bi_ortho/normal_ordered.irp.f | 252 ++++++++++++++++----------- 1 file changed, 150 insertions(+), 102 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 8adc7a63..c30cd1ef 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -8,13 +11,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aba,hthree_aaa,hthree_aab - double precision :: wall0,wall1 + + print*,' Providing normal_two_body_bi_orth ...' + call wall_time(wall0) PROVIDE N_int @@ -23,23 +29,21 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 - print*,'Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + normal_two_body_bi_orth(1:mo_num,1:mo_num,1:mo_num,1:mo_num) = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) do pp1 = 1, n_act_orb @@ -48,50 +52,57 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ h2 = list_act(hh2) do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - ! opposite spin double excitations : s1 /= s2 call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 .lt. h2) .and. (p1 .gt. p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0 * (hthree_aba + hthree_aab + hthree_aaa) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0 + !$OMP END DO + !$OMP END PARALLEL deallocate( occ ) deallocate( key_i_core ) + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 + END_PROVIDER - +! --- subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) @@ -106,30 +117,41 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) !!!! double alpha/beta hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12) + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) enddo + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12) + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) enddo -end subroutine give_aba_contraction - + return +end +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] @@ -152,29 +174,31 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, allocate( key_i_core(N_int,2) ) allocate( occ(N_int*bit_kind_size,2) ) - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) endif + normal_two_body_bi_orth_ab = 0.d0 do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo enddo - enddo enddo deallocate( key_i_core ) @@ -182,7 +206,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, END_PROVIDER - +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] @@ -250,13 +274,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_ END_PROVIDER - +! --- subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC -! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 END_DOC + use bitmasks ! you need to include the bitmasks_module.f90 features implicit none @@ -270,48 +295,64 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 ) + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) enddo + do ii = Ne(2)+1,Ne(1) ! purely open-shell part i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) - int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) - int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) - int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) - int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 )) + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo -end subroutine give_aaa_contraction - + return +end +! --- subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) double precision, intent(out) :: hthree integer :: ii, i double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 @@ -320,11 +361,18 @@ subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct - int_exc_23 + + hthree += 1.d0 * int_direct - int_exc_23 enddo -end subroutine give_aab_contraction + return +end + +! --- + From f0ad63966adf94b1bbe794186a0f35d07c744013 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 30 May 2023 13:48:34 +0200 Subject: [PATCH 29/79] Fixes for numerical orbitals in qp_import --- ocaml/Input_ao_basis.ml | 47 ++++---- ocaml/Input_mo_basis.ml | 5 +- scripts/qp_import_trexio.py | 146 +++++++++++++++-------- src/trexio/import_trexio_integrals.irp.f | 136 ++++++++++++++++----- 4 files changed, 236 insertions(+), 98 deletions(-) diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 841089ea..506cf069 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -44,8 +44,12 @@ end = struct let get_default = Qpackage.get_ezfio_default "ao_basis";; let read_ao_basis () = - Ezfio.get_ao_basis_ao_basis () - |> AO_basis_name.of_string + let result = + Ezfio.get_ao_basis_ao_basis () + in + if result <> "None" then + AO_basis_name.of_string result + else failwith "No basis" ;; let read_ao_num () = @@ -192,7 +196,7 @@ end = struct ao_expo ; ao_cartesian ; ao_normalized ; - primitives_normalized ; + primitives_normalized ; } = b in write_md5 b ; @@ -207,7 +211,7 @@ end = struct Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - let ao_nucl = + let ao_nucl = Array.to_list ao_nucl |> list_map Nucl_number.to_int in @@ -215,7 +219,7 @@ end = struct ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; let ao_power = - let l = Array.to_list ao_power in + let l = Array.to_list ao_power in List.concat [ (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ; (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ; @@ -227,7 +231,7 @@ end = struct Ezfio.set_ao_basis_ao_cartesian(ao_cartesian); Ezfio.set_ao_basis_ao_normalized(ao_normalized); Ezfio.set_ao_basis_primitives_normalized(primitives_normalized); - + let ao_coef = Array.to_list ao_coef |> list_map AO_coef.to_float @@ -267,7 +271,10 @@ end = struct |> Ezfio.set_ao_basis_ao_md5 ; Some result with - | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) + | _ -> ( "None" + |> Digest.string + |> Digest.to_hex + |> Ezfio.set_ao_basis_ao_md5 ; None) ;; @@ -276,7 +283,7 @@ end = struct to_basis b |> Long_basis.of_basis |> Array.of_list - and unordered_basis = + and unordered_basis = to_long_basis b |> Array.of_list in @@ -289,15 +296,15 @@ end = struct (a.(i) <- None ; i) else find x a (i+1) - and find2 (s,g,n) a i = + and find2 (s,g,n) a i = if i = Array.length a then -1 else - match a.(i) with + match a.(i) with | None -> find2 (s,g,n) a (i+1) | Some (s', g', n') -> if s <> s' || n <> n' then find2 (s,g,n) a (i+1) else - let lc = list_map (fun (prim, _) -> prim) g.Gto.lc + let lc = list_map (fun (prim, _) -> prim) g.Gto.lc and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc in if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i) @@ -313,13 +320,13 @@ end = struct let ao_num = List.length long_basis |> AO_number.of_int in let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc - |> AO_prim_number.of_int ) long_basis + |> AO_prim_number.of_int ) long_basis |> Array.of_list and ao_nucl = - list_map (fun (_,_,n) -> n) long_basis + list_map (fun (_,_,n) -> n) long_basis |> Array.of_list and ao_power = - list_map (fun (x,_,_) -> x) long_basis + list_map (fun (x,_,_) -> x) long_basis |> Array.of_list in let ao_prim_num_max = Array.fold_left (fun s x -> @@ -329,16 +336,16 @@ end = struct in let gtos = - list_map (fun (_,x,_) -> x) long_basis + list_map (fun (_,x,_) -> x) long_basis in let create_expo_coef ec = let coefs = begin match ec with | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos + list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos | `Expos -> list_map (fun x-> list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc ) gtos + prim.GaussianPrimitive.expo) x.Gto.lc ) gtos end in let rec get_n n accu = function @@ -360,7 +367,7 @@ end = struct let ao_coef = create_expo_coef `Coefs |> Array.of_list |> Array.map AO_coef.of_float - and ao_expo = create_expo_coef `Expos + and ao_expo = create_expo_coef `Expos |> Array.of_list |> Array.map AO_expo.of_float in @@ -372,7 +379,7 @@ end = struct } ;; - let reorder b = + let reorder b = let order = ordering b in let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max @@ -464,7 +471,7 @@ Basis set (read-only) :: | line :: tail -> let line = String.trim line in if line = "Basis set (read-only) ::" then - String.concat "\n" tail + String.concat "\n" tail else extract_basis tail in diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml index a4e6176a..832b464e 100644 --- a/ocaml/Input_mo_basis.ml +++ b/ocaml/Input_mo_basis.ml @@ -56,7 +56,10 @@ end = struct let read_ao_md5 () = let ao_md5 = match (Input_ao_basis.Ao_basis.read ()) with - | None -> failwith "Unable to read AO basis" + | None -> ("None" + |> Digest.string + |> Digest.to_hex + |> MD5.of_string) | Some result -> Input_ao_basis.Ao_basis.to_md5 result in let result = diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index eb19e16b..e7bc0f78 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -132,60 +132,113 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() not in ["gaussian", "slater"]: - raise TypeError + if basis_type.lower() in ["gaussian", "slater"]: + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) - shell_num = trexio.read_basis_shell_num(trexio_file) - prim_num = trexio.read_basis_prim_num(trexio_file) - ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) - nucl_index = trexio.read_basis_nucleus_index(trexio_file) - exponent = trexio.read_basis_exponent(trexio_file) - coefficient = trexio.read_basis_coefficient(trexio_file) - shell_index = trexio.read_basis_shell_index(trexio_file) - ao_shell = trexio.read_ao_shell(trexio_file) + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) - ezfio.set_basis_basis("Read from TREXIO") - ezfio.set_basis_shell_num(shell_num) - ezfio.set_basis_prim_num(prim_num) - ezfio.set_basis_shell_ang_mom(ang_mom) - ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) - ezfio.set_basis_prim_expo(exponent) - ezfio.set_basis_prim_coef(coefficient) + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) - nucl_shell_num = [] - prev = None - m = 0 - for i in ao_shell: - if i != prev: - m += 1 - if prev is None or nucl_index[i] != nucl_index[prev]: - nucl_shell_num.append(m) - m = 0 - prev = i - assert (len(nucl_shell_num) == nucl_num) + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) - shell_prim_num = [] - prev = shell_index[0] - count = 0 - for i in shell_index: - if i != prev: - shell_prim_num.append(count) - count = 0 - count += 1 - prev = i - shell_prim_num.append(count) + assert (len(shell_prim_num) == shell_num) - assert (len(shell_prim_num) == shell_num) - - ezfio.set_basis_shell_prim_num(shell_prim_num) - ezfio.set_basis_shell_index([x+1 for x in shell_index]) - ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) - shell_factor = trexio.read_basis_shell_factor(trexio_file) - prim_factor = trexio.read_basis_prim_factor(trexio_file) + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) - print("OK") + elif basis_type.lower() == "numerical": + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = shell_num + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = [1.]*prim_num + coefficient = [1.]*prim_num + shell_index = [i for i in range(shell_num)] + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("None") + ezfio.set_ao_basis_ao_basis("None") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = [1.]*prim_num + else: + raise TypeError + + print(basis_type) except: print("None") ezfio.set_ao_basis_ao_cartesian(True) @@ -262,7 +315,6 @@ def write_ezfio(trexio_filename, filename): # ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) - ezfio.set_ao_basis_ao_basis("Read from TREXIO") print("OK") diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 9f9ad9d6..8c6b79d7 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -3,6 +3,7 @@ program import_integrals_ao implicit none integer(trexio_t) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc + PROVIDE mo_num f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) if (f == 0_8) then @@ -42,10 +43,10 @@ subroutine run(f) if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) - call trexio_assert(rc, TREXIO_SUCCESS) if (rc /= TREXIO_SUCCESS) then print *, irp_here, rc print *, 'Error reading nuclear repulsion' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_nuclei_nuclear_repulsion(s) @@ -63,6 +64,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO overlap' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) @@ -74,6 +76,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO kinetic integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) @@ -85,6 +88,7 @@ subroutine run(f) ! if (rc /= TREXIO_SUCCESS) then ! print *, irp_here ! print *, 'Error reading AO ECP local integrals' +! call trexio_assert(rc, TREXIO_SUCCESS) ! stop -1 ! endif ! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) @@ -96,6 +100,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO potential N-e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) @@ -106,41 +111,112 @@ subroutine run(f) ! AO 2e integrals ! --------------- - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int(f) + PROVIDE ao_num + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map - integer*8 :: offset, icount + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + else + print *, 'AO integrals not found in TREXIO file' + endif + + ! MO integrals + ! ------------ + + allocate(A(mo_num, mo_num)) + if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then + rc = trexio_read_mo_1e_int_core_hamiltonian(f, A) if (rc /= TREXIO_SUCCESS) then - exit + print *, irp_here + print *, 'Error reading MO 1e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) + stop -1 endif - end do - n_integrals = offset + call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A) + call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read') + endif + deallocate(A) - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + ! MO 2e integrals + ! --------------- - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + rc = trexio_has_mo_2e_int(f) + if (rc /= TREXIO_HAS_NOT) then + + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + else + print *, 'MO integrals not found in TREXIO file' + endif end From ff5d62f840ee0c685120df72724d6ba049a07037 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 31 May 2023 11:01:51 +0200 Subject: [PATCH 30/79] Fix normalization factor in trexio --- src/trexio/EZFIO.cfg | 30 ++-- src/trexio/export_trexio.irp.f | 2 +- src/trexio/export_trexio_routines.irp.f | 176 +++++++++++++----------- 3 files changed, 119 insertions(+), 89 deletions(-) diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 8606e908..8c11478e 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file interface: ezfio, ocaml, provider default: None -[export_rdm] +[export_basis] type: logical -doc: If True, export two-body reduced density matrix +doc: If True, export basis set and AOs interface: ezfio, ocaml, provider -default: False +default: True + +[export_mos] +type: logical +doc: If True, export basis set and AOs +interface: ezfio, ocaml, provider +default: True [export_ao_one_e_ints] type: logical @@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis interface: ezfio, ocaml, provider default: False -[export_mo_one_e_ints] -type: logical -doc: If True, export one-electron integrals in MO basis -interface: ezfio, ocaml, provider -default: False - [export_ao_two_e_ints] type: logical doc: If True, export two-electron integrals in AO basis @@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis interface: ezfio, ocaml, provider default: False +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + [export_mo_two_e_ints] type: logical doc: If True, export two-electron integrals in MO basis @@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis interface: ezfio, ocaml, provider default: False +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f index 3ae0dcb4..f9ecc17f 100644 --- a/src/trexio/export_trexio.irp.f +++ b/src/trexio/export_trexio.irp.f @@ -2,6 +2,6 @@ program export_trexio_prog implicit none read_wf = .True. SOFT_TOUCH read_wf - call export_trexio + call export_trexio(.False.) end diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c55ddc5e..f25ae370 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -1,15 +1,17 @@ -subroutine export_trexio +subroutine export_trexio(update) use trexio implicit none BEGIN_DOC ! Exports the wave function in TREXIO format END_DOC + logical, intent(in) :: update integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc integer :: k double precision, allocatable :: factor(:) character*(256) :: filenames(N_states) + character :: rw filenames(1) = trexio_filename do k=2,N_states @@ -18,15 +20,26 @@ subroutine export_trexio do k=1,N_states print *, 'TREXIO file : ', trim(filenames(k)) - call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + if (update) then + call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + else + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + endif enddo print *, '' + if (update) then + rw = 'u' + else + rw = 'w' + endif + + do k=1,N_states if (backend == 0) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc) else if (backend == 1) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc) endif if (f(k) == 0_8) then print *, 'Unable to open TREXIO file for writing' @@ -171,92 +184,95 @@ subroutine export_trexio endif + if (export_basis) then + ! Basis ! ----- - print *, 'Basis' + print *, 'Basis' + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f(1), prim_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f(1), shell_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) - call trexio_assert(rc, TREXIO_SUCCESS) + allocate(factor(shell_num)) +! if (ao_normalized) then +! factor(1:shell_num) = shell_normalization_factor(1:shell_num) +! else + factor(1:shell_num) = 1.d0 +! endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(shell_num)) - if (ao_normalized) then - factor(1:shell_num) = shell_normalization_factor(1:shell_num) - else - factor(1:shell_num) = 1.d0 - endif - rc = trexio_write_basis_shell_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - deallocate(factor) + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_index(f(1), shell_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f(1), prim_expo) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f(1), prim_coef) - call trexio_assert(rc, TREXIO_SUCCESS) - - allocate(factor(prim_num)) - if (primitives_normalized) then - factor(1:prim_num) = prim_normalization_factor(1:prim_num) - else - factor(1:prim_num) = 1.d0 - endif - rc = trexio_write_basis_prim_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) ! Atomic orbitals ! --------------- - print *, 'AOs' + print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - allocate(factor(ao_num)) - if (ao_normalized) then - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - else - factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) ! One-e AO integrals ! ------------------ @@ -375,28 +391,30 @@ subroutine export_trexio ! Molecular orbitals ! ------------------ - print *, 'MOs' + if (export_mos) then + print *, 'MOs' - rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) - call trexio_assert(rc, TREXIO_SUCCESS) - - do k=1,N_states - rc = trexio_write_mo_num(f(k), mo_num) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - if ( (trim(mo_label) == 'Canonical').and. & - (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) - call trexio_assert(rc, TREXIO_SUCCESS) - ! One-e MO integrals ! ------------------ From 3306d26e0e0f08cd407df73de4f44388ebc6a919 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 31 May 2023 11:47:53 +0200 Subject: [PATCH 31/79] Fix import_trexio --- scripts/qp_import_trexio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index e7bc0f78..89096387 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -340,6 +340,7 @@ def write_ezfio(trexio_filename, filename): except: label = "None" ezfio.set_mo_basis_mo_label(label) + ezfio.set_determinants_mo_label(label) try: clss = trexio.read_mo_class(trexio_file) From 87090d73978169b167e103e3fd867682d9f5b32f Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 31 May 2023 18:11:54 +0200 Subject: [PATCH 32/79] fixed nuclear repulsion in fci_tc_bi_ortho --- src/fci_tc_bi/diagonalize_ci.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index b6ec073f..df753449 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -39,7 +39,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 print*,'*****' endif - psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) E_tc = eigval_right_tc_bi_orth(1) From 00be08932321f73b425987f99e4e5fcc685425f9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 09:56:06 +0200 Subject: [PATCH 33/79] Removed duplicate provider in cosgto --- src/cosgtos_ao_int/EZFIO.cfg | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg index 8edeecd0..fe57b652 100644 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ b/src/cosgtos_ao_int/EZFIO.cfg @@ -10,10 +10,3 @@ doc: If true, use cosgtos for AO integrals interface: ezfio,provider,ocaml default: False -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - From 77186e0560d7febb0cf0f0ce7c6c03f98e1b9d9c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 11:11:29 +0200 Subject: [PATCH 34/79] Cleaned cosgtos --- src/ao_basis/EZFIO.cfg | 11 +++++++++++ src/ao_one_e_ints/NEED | 1 - .../aos_cosgtos.irp.f | 0 .../one_e_Coul_integrals_cosgtos.irp.f} | 0 .../one_e_kin_integrals_cosgtos.irp.f} | 0 .../gauss_legendre.irp.f | 0 .../two_e_Coul_integrals_cosgtos.irp.f} | 0 src/cosgtos_ao_int/EZFIO.cfg | 12 ------------ src/cosgtos_ao_int/NEED | 2 -- src/cosgtos_ao_int/README.rst | 4 ---- src/cosgtos_ao_int/cosgtos_ao_int.irp.f | 7 ------- 11 files changed, 11 insertions(+), 26 deletions(-) rename src/{cosgtos_ao_int => ao_one_e_ints}/aos_cosgtos.irp.f (100%) rename src/{cosgtos_ao_int/one_e_Coul_integrals.irp.f => ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f} (100%) rename src/{cosgtos_ao_int/one_e_kin_integrals.irp.f => ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f} (100%) rename src/{cosgtos_ao_int => ao_two_e_ints}/gauss_legendre.irp.f (100%) rename src/{cosgtos_ao_int/two_e_Coul_integrals.irp.f => ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f} (100%) delete mode 100644 src/cosgtos_ao_int/EZFIO.cfg delete mode 100644 src/cosgtos_ao_int/NEED delete mode 100644 src/cosgtos_ao_int/README.rst delete mode 100644 src/cosgtos_ao_int/cosgtos_ao_int.irp.f diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index 51d726da..a203e3f0 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -67,3 +67,14 @@ doc: Use normalized primitive functions interface: ezfio, provider default: true +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio,provider +default: False diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index b9caaf5d..61d23b1e 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,3 +1,2 @@ ao_basis pseudo -cosgtos_ao_int diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/ao_one_e_ints/aos_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/aos_cosgtos.irp.f rename to src/ao_one_e_ints/aos_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_Coul_integrals.irp.f rename to src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_kin_integrals.irp.f rename to src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/ao_two_e_ints/gauss_legendre.irp.f similarity index 100% rename from src/cosgtos_ao_int/gauss_legendre.irp.f rename to src/ao_two_e_ints/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/two_e_Coul_integrals.irp.f rename to src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg deleted file mode 100644 index fe57b652..00000000 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ /dev/null @@ -1,12 +0,0 @@ -[ao_expoim_cosgtos] -type: double precision -doc: imag part for Exponents for each primitive of each cosGTOs |AO| -size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) -interface: ezfio, provider - -[use_cosgtos] -type: logical -doc: If true, use cosgtos for AO integrals -interface: ezfio,provider,ocaml -default: False - diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED deleted file mode 100644 index 932f88a3..00000000 --- a/src/cosgtos_ao_int/NEED +++ /dev/null @@ -1,2 +0,0 @@ -ezfio_files -ao_basis diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst deleted file mode 100644 index 01f25d6d..00000000 --- a/src/cosgtos_ao_int/README.rst +++ /dev/null @@ -1,4 +0,0 @@ -============== -cosgtos_ao_int -============== - diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f deleted file mode 100644 index d65dfba5..00000000 --- a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f +++ /dev/null @@ -1,7 +0,0 @@ -program cosgtos_ao_int - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' -end From d05e4ed0b310fb083bf4318e8c7dee481dda302f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 17:46:07 +0200 Subject: [PATCH 35/79] Fix ao_basis/use_cosgtos not found in EZFIO file --- src/ao_basis/EZFIO.cfg | 3 ++- src/ao_basis/cosgtos.irp.f | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 src/ao_basis/cosgtos.irp.f diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index a203e3f0..6ad9b998 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -76,5 +76,6 @@ interface: ezfio, provider [use_cosgtos] type: logical doc: If true, use cosgtos for AO integrals -interface: ezfio,provider +interface: ezfio default: False + diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f new file mode 100644 index 00000000..721a3e57 --- /dev/null +++ b/src/ao_basis/cosgtos.irp.f @@ -0,0 +1,33 @@ +BEGIN_PROVIDER [ logical, use_cosgtos ] + implicit none + BEGIN_DOC +! If true, use cosgtos for AO integrals + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + call ezfio_has_ao_basis_use_cosgtos(has) + if (has) then +! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' + call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) + else + use_cosgtos = .False. + endif + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( use_cosgtos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read use_cosgtos with MPI' + endif + IRP_ENDIF + +! call write_time(6) + +END_PROVIDER From 5ab6a1d7fba6fbff88ac858747783bb9292b9a89 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 1 Jun 2023 19:59:25 +0200 Subject: [PATCH 36/79] few modifs --- src/bi_ort_ints/semi_num_ints_mo.irp.f | 5 +++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- src/non_h_ints_mu/grad_squared.irp.f | 21 ++++++++++- src/non_h_ints_mu/new_grad_tc.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 6 +++ src/non_h_ints_mu/total_tc_int.irp.f | 5 +++ src/tc_scf/rh_tcscf_diis.irp.f | 39 +++++++++++++++++++- src/tc_scf/tc_scf.irp.f | 4 ++ 8 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 0d727785..771d3274 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -138,10 +138,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao + endif call wall_time(wall1) print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -200,6 +203,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, enddo enddo + FREE int2_grad1_u12_bimo_transp + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..5a3730b3 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -79,7 +79,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 3f1a9bf5..44a6ae65 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -231,6 +231,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time0) PROVIDE j1b_type + PROVIDE int2_grad1u2_grad2u2_j1b2 do ipoint = 1, n_points_final_grid tmp1 = v_1b(ipoint) @@ -242,6 +243,8 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g enddo enddo + FREE int2_grad1u2_grad2u2_j1b2 + !if(j1b_type .eq. 0) then ! grad12_j12 = 0.d0 ! do ipoint = 1, n_points_final_grid @@ -262,6 +265,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -278,6 +282,9 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g print*, ' providing u12sq_j1bsq ...' call wall_time(time0) + ! do not free here + PROVIDE int2_u2_j1b2 + do ipoint = 1, n_points_final_grid tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -292,6 +299,7 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -310,6 +318,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...' call wall_time(time0) + PROVIDE int2_u_grad1u_j1b2 + PROVIDE int2_u_grad1u_x_j1b2 + do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) @@ -340,14 +351,17 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, enddo enddo + FREE int2_u_grad1u_j1b2 + FREE int2_u_grad1u_x_j1b2 + call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 + call print_memory_usage() END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -401,6 +415,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) + FREE int2_grad1_u12_square_ao + ! --- if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then @@ -442,6 +458,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) + + FREE int2_u2_j1b2 endif ! --- @@ -478,6 +496,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 24e7e743..499ffe9d 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -284,6 +284,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d5995ae5..8251fc71 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -176,6 +176,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() END_PROVIDER @@ -242,6 +243,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq grad12_j12 + else PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 @@ -262,6 +265,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + endif elseif(j1b_type .ge. 100) then @@ -324,6 +329,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p call wall_time(time1) print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 450bbef0..2034872a 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,8 +84,13 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif + FREE tc_grad_square_ao + FREE tc_grad_and_lapl_ao + FREE ao_two_e_coul + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 20260a95..0504373c 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -11,6 +11,7 @@ subroutine rh_tcscf_diis() integer :: i, j, it integer :: dim_DIIS, index_dim_DIIS + logical :: converged double precision :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta double precision :: tc_grad, g_save, g_delta, g_delta_th double precision :: level_shift_save, rate_th @@ -92,8 +93,9 @@ subroutine rh_tcscf_diis() PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + converged = .false. !do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. dsqrt(thresh_tcscf))) - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) + do while(.not. converged) call wall_time(t0) @@ -218,21 +220,56 @@ subroutine rh_tcscf_diis() !g_delta_th = dabs(tc_grad) ! g_delta) er_delta_th = dabs(er_DIIS) !er_delta) + converged = er_DIIS .lt. dsqrt(thresh_tcscf) + call wall_time(t1) !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & ! it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + +! Write data in JSON file + + call lock_io + if (it == 1) then + write(json_unit, json_dict_uopen_fmt) + else + write(json_unit, json_dict_close_uopen_fmt) + endif + write(json_unit, json_int_fmt) ' iteration ', it + write(json_unit, json_real_fmt) ' SCF TC Energy ', etc_tot + write(json_unit, json_real_fmt) ' E(1e) ', etc_1e + write(json_unit, json_real_fmt) ' E(2e) ', etc_2e + write(json_unit, json_real_fmt) ' E(3e) ', etc_3e + write(json_unit, json_real_fmt) ' delta Energy ', e_delta + write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS + write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf + write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 + call unlock_io + if(er_delta .lt. 0.d0) then call ezfio_set_tc_scf_bitc_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + call lock_io + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + call unlock_io if(qp_stop()) exit enddo + write(json_unit, json_dict_close_fmtx) + ! --- print *, ' TCSCF DIIS converged !' diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 88ddd26c..04c4f92d 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,6 +8,8 @@ program tc_scf implicit none + write(json_unit,json_array_open_fmt) 'tc-scf' + print *, ' starting ...' my_grid_becke = .True. @@ -57,6 +59,8 @@ program tc_scf endif + write(json_unit,json_array_close_fmtx) + call json_close end From 6971bf186cf020ce66d0bac091d06ae850bd803f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 21:42:02 +0200 Subject: [PATCH 37/79] Accelerated multiply_poly --- src/ao_one_e_ints/pot_ao_ints.irp.f | 21 +- src/ao_two_e_ints/two_e_integrals.irp.f | 136 +------ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 52 +-- src/utils/integration.irp.f | 366 +++++++++++++------ 4 files changed, 305 insertions(+), 270 deletions(-) diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 928053ad..446bf730 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -455,10 +455,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) else do ix=0,n_pt_in X(ix) = 0.d0 @@ -469,7 +471,8 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(a-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) nx = nd do ix=0,n_pt_in @@ -479,10 +482,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x1_pol_mult_one_e(a-1,c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) endif end @@ -519,7 +524,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) do ix=0,nx X(ix) *= dble(c-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny = 0 do ix=0,dim Y(ix) = 0.d0 @@ -527,7 +533,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) call I_x1_pol_mult_one_e(0,c-1,R1x,R1xp,R2x,Y,ny,dim) if(ny.ge.0)then - call multiply_poly(Y,ny,R1xp,2,d,nd) +! call multiply_poly(Y,ny,R1xp,2,d,nd) + call multiply_poly_c2(Y,ny,R1xp,d,nd) endif endif end diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 835dc89a..85ff5bcf 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -975,18 +975,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_10(0) * X(ib) - d(ib+1) = d(ib+1) + B_10(1) * X(ib) - d(ib+2) = d(ib+2) + B_10(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1009,17 +998,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) endif ny=0 @@ -1038,17 +1017,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1088,18 +1057,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 @@ -1111,17 +1069,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end @@ -1150,18 +1098,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_10(0) * X(ib) - d(ib+1) = d(ib+1) + B_10(1) * X(ib) - d(ib+2) = d(ib+2) + B_10(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1181,17 +1118,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1203,17 +1130,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1262,18 +1179,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - if (ny >= 0) then - integer :: ib - do ib=0,ny - d(ib ) = d(ib ) + D_00(0) * Y(ib) - d(ib+1) = d(ib+1) + D_00(1) * Y(ib) - d(ib+2) = d(ib+2) + D_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,D_00,d,nd) return @@ -1293,17 +1199,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_01,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_01(0) * X(ib) - d(ib+1) = d(ib+1) + B_01(1) * X(ib) - d(ib+2) = d(ib+2) + B_01(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1314,17 +1210,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + D_00(0) * Y(ib) - d(ib+1) = d(ib+1) + D_00(1) * Y(ib) - d(ib+2) = d(ib+2) + D_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,D_00,d,nd) end select end diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..a72cd682 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC -! matrix element of the -L three-body operator +! matrix element of the -L three-body operator ! ! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :) END_DOC @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n implicit none integer :: i, j, k, l, m, n double precision :: integral, wall1, wall0 - character*(128) :: name_file + character*(128) :: name_file three_body_ints_bi_ort = 0.d0 print *, ' Providing the three_body_ints_bi_ort ...' @@ -27,12 +27,12 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) ! else - !provide x_W_ki_bi_ortho_erf_rk + !provide x_W_ki_bi_ortho_erf_rk provide mos_r_in_r_array_transp mos_l_in_r_array_transp !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,n,integral) & + !$OMP PRIVATE (i,j,k,l,m,n,integral) & !$OMP SHARED (mo_num,three_body_ints_bi_ort) !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num @@ -43,7 +43,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) - three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral + three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral enddo enddo enddo @@ -63,7 +63,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read") ! endif -END_PROVIDER +END_PROVIDER ! --- @@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -79,28 +79,30 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) - integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo end subroutine give_integrals_3_body_bi_ort @@ -111,7 +113,7 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -123,13 +125,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) @@ -138,11 +140,11 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) @@ -151,13 +153,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) ) @@ -176,7 +178,7 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS ! END_DOC @@ -188,13 +190,13 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) - integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & + integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) ) - integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & + integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) ) diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index b60e3bc1..21179dac 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,7 +86,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 @@ -468,114 +468,6 @@ end subroutine -subroutine multiply_poly_0c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:0), c(0:nc) - double precision, intent(inout) :: d(0:0+nc) - - integer :: ic - - do ic = 0,nc - d(ic) = d(ic) + c(ic) * b(0) - enddo - - do nd = nc,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - -subroutine multiply_poly_1c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:1), c(0:nc) - double precision, intent(inout) :: d(0:1+nc) - - integer :: ic, id - if(nc < 0) return - - do ic = 0,nc - d( ic) = d( ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - enddo - - do nd = nc+1,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - - -subroutine multiply_poly_2c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:2), c(0:nc) - double precision, intent(inout) :: d(0:2+nc) - - integer :: ic, id, k - if (nc <0) return - - do ic = 0,nc - d( ic) = d( ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - d(2+ic) = d(2+ic) + c(ic) * b(2) - enddo - - do nd = nc+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - -subroutine multiply_poly_3c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:3), c(0:nc) - double precision, intent(inout) :: d(0:3+nc) - - integer :: ic, id - if (nc <0) return - - do ic = 1,nc - d( ic) = d(1+ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - d(2+ic) = d(1+ic) + c(ic) * b(2) - d(3+ic) = d(1+ic) + c(ic) * b(3) - enddo - - do nd = nc+3,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - - - subroutine multiply_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -604,6 +496,254 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) end + +subroutine multiply_poly_b0(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:nc) + + integer :: ndtmp + integer :: ic, id, k + if(nc < 0) return !False if nc>=0 + + do ic = 0,nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_b1(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ic = 1,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + + end select + + do nd = 1+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b2(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + d(2) = d(2) + c(0) * b(2) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + d(3) = d(3) + c(1) * b(2) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + c(1) * b(2) + d(4) = d(4) + c(2) * b(2) + + case default + + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ic = 2,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + c(ic-2) * b(2) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + c(nc-1) * b(2) + d(nc+2) = d(nc+2) + c(nc) * b(2) + + end select + + do nd = 2+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c0(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:0) + double precision, intent(inout) :: d(0:nb) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + do ib=0,nb + d(ib) = d(ib) + c(0) * b(ib) + enddo + + do nd = nb,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c1(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:1) + double precision, intent(inout) :: d(0:nb+1) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ib=1,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + + end select + + do nd = nb+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c2(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + + + subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) implicit none BEGIN_DOC @@ -778,11 +918,11 @@ end subroutine recentered_poly2_v subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points) BEGIN_DOC - ! + ! ! Recenter two polynomials. Special case for b=(0,0,0) - ! + ! ! (x - A)^a (x - B)^0 = (x - P + P - A)^a (x - Q + Q - B)^0 - ! = (x - P + P - A)^a + ! = (x - P + P - A)^a ! END_DOC From b9c18338960064f01bdd3e7bf6427ab3510575c9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 00:33:37 +0200 Subject: [PATCH 38/79] Optimized three_e_5_idx_exch12_bi_ort --- src/bi_ort_ints/bi_ort_ints.irp.f | 12 +- src/bi_ort_ints/three_body_ijmkl.irp.f | 162 ++++++++++++++++++++----- src/utils/integration.irp.f | 24 ++++ 3 files changed, 166 insertions(+), 32 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index ca50dd56..63b2aa8c 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -16,23 +16,27 @@ subroutine test_3e double precision :: accu, contrib,new,ref i = 1 k = 1 + n = 0 accu = 0.d0 do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num - do n = 1, mo_num - call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) - call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! do n = 1, mo_num +! call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) +! call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) contrib = dabs(new - ref) accu += contrib if(contrib .gt. 1.d-10)then print*,'pb !!' print*,i,k,j,l,m,n print*,ref,new,contrib + stop endif - enddo +! enddo enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..af2cb353 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -24,7 +24,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -45,7 +45,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -73,7 +73,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -82,7 +82,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -94,7 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -122,7 +122,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -131,7 +131,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num do m = 1, mo_num do l = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -171,7 +171,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -180,7 +180,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -192,7 +192,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -220,7 +220,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -229,7 +229,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -241,7 +241,57 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER ! --- @@ -259,38 +309,94 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, implicit none integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - three_e_5_idx_exch12_bi_ort = 0.d0 print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' call wall_time(wall0) - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + do m = 1, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP m2grad_r, m2grad_l, grad_mli, tmp_mat, orb_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + m2grad_l, 3*n_points_final_grid, & + m2grad_r, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = & + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 21179dac..b548b18a 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -484,6 +484,30 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ib, ic, id, k if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + select case (nb) + case (0) + call multiply_poly_b0(b,c,nc,d,nd) + return + case (1) + call multiply_poly_b1(b,c,nc,d,nd) + return + case (2) + call multiply_poly_b2(b,c,nc,d,nd) + return + end select + + select case (nc) + case (0) + call multiply_poly_c0(b,nb,c,d,nd) + return + case (1) + call multiply_poly_c1(b,nb,c,d,nd) + return + case (2) + call multiply_poly_c2(b,nb,c,d,nd) + return + end select + do ib=0,nb do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib) From fb5300a8e59d4dc08c4ce118317f4deffef7daba Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 08:51:04 +0200 Subject: [PATCH 39/79] Preparing for optimization of 5idx in TC --- external/qp2-dependencies | 2 +- src/bi_ort_ints/three_body_ijmkl.irp.f | 58 +--- src/bi_ort_ints/three_body_ijmkl_old.irp.f | 295 +++++++++++++++++++++ 3 files changed, 303 insertions(+), 52 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmkl_old.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index af2cb353..5220d8c7 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -245,56 +245,6 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - three_e_5_idx_exch12_bi_ort_old = 0.d0 - print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -305,6 +255,12 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! + ! Equivalent to: + ! + ! call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + ! END_DOC implicit none @@ -314,10 +270,10 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, double precision :: weight double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp diff --git a/src/bi_ort_ints/three_body_ijmkl_old.irp.f b/src/bi_ort_ints/three_body_ijmkl_old.irp.f new file mode 100644 index 00000000..105cd179 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmkl_old.irp.f @@ -0,0 +1,295 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) + three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_direct_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER + From c4612318ae9cce73c3cf668703827eb9c7bfd093 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 09:11:32 +0200 Subject: [PATCH 40/79] Optimized direct 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 62 ++- src/bi_ort_ints/three_body_ijmkl.irp.f | 500 ++++++++++++------------- 2 files changed, 297 insertions(+), 265 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 63b2aa8c..d0367f6f 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -7,7 +7,8 @@ program bi_ort_ints my_n_pt_r_grid = 10 my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_3e +! call test_3e + call test_5idx end subroutine test_3e @@ -19,15 +20,13 @@ subroutine test_3e n = 0 accu = 0.d0 do i = 1, mo_num - do k = 1, mo_num + do k = 1, mo_num do j = 1, mo_num - do l = 1, mo_num + do l = 1, mo_num do m = 1, mo_num - new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) -! do n = 1, mo_num -! call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) -! call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) + do n = 1, mo_num + call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) + call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) contrib = dabs(new - ref) accu += contrib if(contrib .gt. 1.d-10)then @@ -36,7 +35,7 @@ subroutine test_3e print*,ref,new,contrib stop endif -! enddo + enddo enddo enddo enddo @@ -46,3 +45,48 @@ subroutine test_3e end + +subroutine test_5idx + implicit none + integer :: i,k,j,l,m,n,ipoint + double precision :: accu, contrib,new,ref + i = 1 + k = 1 + n = 0 + accu = 0.d0 + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch12' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + + enddo + enddo + enddo + enddo + enddo + print*,'accu = ',accu/dble(mo_num)**5 + + +end diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 5220d8c7..1db773f1 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,8 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -12,257 +13,6 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign END_DOC - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - ! Equivalent to: - ! - ! call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - ! - ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral - ! - END_DOC - implicit none integer :: i, j, k, m, l double precision :: wall1, wall0 @@ -279,7 +29,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' + print *, ' Providing the three_e_5_idx_direct_bi_ort ...' call wall_time(wall0) do m = 1, mo_num @@ -322,6 +72,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo @@ -339,8 +90,8 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = & - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) enddo enddo enddo @@ -350,9 +101,246 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + + From 00bd8e2fcc8d435a1484af065a443efee3ca3c9f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 10:34:05 +0200 Subject: [PATCH 41/79] Optimized cyclic 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 65 ++++- src/bi_ort_ints/three_body_ijmkl.irp.f | 288 +++++++------------ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 1 + 3 files changed, 152 insertions(+), 202 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index d0367f6f..eae0affe 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -59,17 +59,18 @@ subroutine test_5idx do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num - new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'direct' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif +! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'direct' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! ! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) ! contrib = dabs(new - ref) @@ -81,6 +82,50 @@ subroutine test_5idx ! stop ! endif +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + + new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch13' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 1db773f1..9f316771 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,4 +1,3 @@ - ! --- BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] @@ -17,7 +16,6 @@ integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision :: weight double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) @@ -45,17 +43,22 @@ do i=1,mo_num do l=1,mo_num do ipoint=1, n_points_final_grid + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo enddo enddo @@ -107,240 +110,141 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! END_DOC implicit none - double precision :: integral integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' + print *, ' Providing the three_e_5_idx_cycle_bi_ort ...' call wall_time(wall0) + do m = 1, mo_num + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rk_grad_im, rm_grad_ik, lk_grad_mi, lm_grad_ik, tmp_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -tmp_mat(k,j,l,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -tmp_mat(l,i,k,j) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = -tmp_mat(l,j,k,i) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = -tmp_mat(k,i,l,j) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,i,j,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,j,i,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,i,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,j,i,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_cycle_bi_ort', wall1 - wall0 END_PROVIDER ! --- + diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index a72cd682..1962c8d6 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -85,6 +85,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 + ! (n, l, k, m, j, i) do ipoint = 1, n_points_final_grid tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & From b2c005eccb7e05eaebb59e9dcbc3c0e771c9a87f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 11:08:30 +0200 Subject: [PATCH 42/79] Finished optimizing 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 102 +++++++++---------- src/bi_ort_ints/three_body_ijmkl.irp.f | 135 +++++++------------------ 2 files changed, 87 insertions(+), 150 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index eae0affe..5653a2e2 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -60,60 +60,60 @@ subroutine test_5idx do l = 1, mo_num do m = 1, mo_num -! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'direct' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch12' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif ! -! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'exch12' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle1' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif -! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'cycle1' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle2' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif -! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'cycle2' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif - -! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'exch23' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch23' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 9f316771..c9e88ab9 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,11 @@ ! --- - BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -16,18 +20,22 @@ integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' + print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) do m = 1, mo_num @@ -38,7 +46,8 @@ !$OMP SHARED (m,mo_num,n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP m2grad_r, m2grad_l, grad_mli, tmp_mat, orb_mat) + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi, & + !$OMP grad_mli, tmp_mat, orb_mat) !$OMP DO COLLAPSE(2) do i=1,mo_num do l=1,mo_num @@ -51,13 +60,21 @@ orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) enddo enddo @@ -84,8 +101,8 @@ !$OMP END PARALLEL DO call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - m2grad_l, 3*n_points_final_grid, & - m2grad_r, 3*n_points_final_grid, 0.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & tmp_mat, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k,l) @@ -101,83 +118,6 @@ enddo !$OMP END PARALLEL DO - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) - double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_bi_ort ...' - call wall_time(wall0) - - do m = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rk_grad_im, rm_grad_ik, lk_grad_mi, lm_grad_ik, tmp_mat) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lk_grad_mi, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -237,14 +177,11 @@ END_PROVIDER enddo enddo !$OMP END PARALLEL DO + enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 END_PROVIDER -! --- - - - From 896ac96e7e7339c710b6325972e878d94003b9e5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 11:40:21 +0200 Subject: [PATCH 43/79] Reduced memory in 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 5 + src/bi_ort_ints/three_body_ijmkl.irp.f | 139 ++++++++++++++++--------- 2 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 5653a2e2..f7a42f37 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -9,6 +9,11 @@ program bi_ort_ints touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e call test_5idx +! call test_5idx2 +end + +subroutine test_5idx2 + PROVIDE three_e_5_idx_cycle_2_bi_ort end subroutine test_3e diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index c9e88ab9..bd669163 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -24,12 +24,6 @@ double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp @@ -40,14 +34,15 @@ do m = 1, mo_num + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,l,ipoint) & !$OMP SHARED (m,mo_num,n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi, & - !$OMP grad_mli, tmp_mat, orb_mat) + !$OMP grad_mli, orb_mat) !$OMP DO COLLAPSE(2) do i=1,mo_num do l=1,mo_num @@ -60,22 +55,6 @@ orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - enddo enddo enddo @@ -100,6 +79,41 @@ enddo !$OMP END PARALLEL DO + deallocate(orb_mat,grad_mli) + + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lm_grad_ik, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -118,6 +132,52 @@ enddo !$OMP END PARALLEL DO + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(lm_grad_ik) + + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lk_grad_mi, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -128,10 +188,10 @@ do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -tmp_mat(k,j,l,i) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -tmp_mat(l,i,k,j) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = -tmp_mat(l,j,k,i) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = -tmp_mat(k,i,l,j) + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) enddo enddo enddo @@ -158,26 +218,9 @@ enddo !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,i,j,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,j,i,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,i,j,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,j,i,k) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) enddo call wall_time(wall1) From 81b7751b00f54a988e2df30fb92edc98f0e49474 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 16:10:04 +0200 Subject: [PATCH 44/79] Fix bug in number of args --- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1f3bebc2..b669025e 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -198,7 +198,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ allocate (bounds(2,nbuckets)) do isample=1,nbuckets eta = 1.d0/dble(nbuckets) * dble(isample) - ieta = binary_search(waccu,eta,Nabc,ileft,iright) + ieta = binary_search(waccu,eta,Nabc) bounds(1,isample) = ileft bounds(2,isample) = ieta ileft = ieta+1 From 6d01eb42ca24a4265710b20913d64c9fb3117298 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:16:39 +0200 Subject: [PATCH 45/79] print mem details --- src/bi_ort_ints/three_body_ijm.irp.f | 7 +++++++ src/bi_ort_ints/three_body_ijmk.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 2 ++ src/non_h_ints_mu/total_tc_int.irp.f | 4 +--- 6 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index 4d21cb93..b34638b8 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -49,6 +49,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -102,6 +103,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -155,6 +157,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -208,6 +211,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -261,6 +265,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -306,6 +311,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -359,6 +365,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_ call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 5afd49ab..95b57e37 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -43,6 +43,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -90,6 +91,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -137,6 +139,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -184,6 +187,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -230,6 +234,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -277,6 +282,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..507408e5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -44,6 +44,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -93,6 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -142,6 +144,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -191,6 +194,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -240,6 +244,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -289,6 +294,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index 5a3730b3..f82e8725 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n call wall_time(wall1) print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0 + call print_memory_usage() ! if(write_three_body_ints_bi_ort)then ! print*,'Writing three_body_ints_bi_ort on disk ...' ! call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 8251fc71..b2c0df31 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -100,6 +100,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2034872a..afa10305 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,9 +84,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif - FREE tc_grad_square_ao - FREE tc_grad_and_lapl_ao - FREE ao_two_e_coul + FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 From 107cc3f2fbfb13fdea4bad734c8bc5d11d9d8df8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:19:25 +0200 Subject: [PATCH 46/79] fixed bug in TC-VAR --- src/tc_bi_ortho/tc_utils.irp.f | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..737c393b 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -38,15 +38,16 @@ subroutine write_tc_var() implicit none integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot + double precision :: hmono, htwoe, hthree, htot_1j, htot_j1 double precision :: SIGMA_TC do k = 1, n_states SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) - SIGMA_TC = SIGMA_TC + htot * htot + call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo print *, " state : ", k From 4cc8dae42010e062f82ace4373e2d5927e9074b0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 20:32:31 +0200 Subject: [PATCH 47/79] Improve 5idx --- scripts/qp_import_trexio.py | 15 ++ scripts/utility/qp_bitmasks.py | 6 +- src/ao_basis/cosgtos.irp.f | 3 +- src/bi_ort_ints/bi_ort_ints.irp.f | 115 ++++++------- src/bi_ort_ints/three_body_ijmkl.irp.f | 10 +- src/tc_bi_ortho/31.tc_bi_ortho.bats | 34 ++-- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 89 +++++----- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 +- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 180 +++++++++++---------- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 3 +- 10 files changed, 247 insertions(+), 211 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 89096387..2c829f5c 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -17,6 +17,7 @@ import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +import qp_bitmasks try: import trexio @@ -453,6 +454,20 @@ def write_ezfio(trexio_filename, filename): else: print("None") + print("Determinant\t\t...\t", end=' ') + alpha = [ i for i in range(num_alpha) ] + beta = [ i for i in range(num_beta) ] + if trexio.has_mo_spin(trexio_file): + spin = trexio.read_mo_spin(trexio_file) + beta = [ i for i in range(mo_num) if spin[i] == 1 ] + beta = [ beta[i] for i in range(num_beta) ] + + alpha = qp_bitmasks.BitMask(alpha) + beta = qp_bitmasks.BitMask(beta ) + print(alpha) + print(beta) + print("OK") + diff --git a/scripts/utility/qp_bitmasks.py b/scripts/utility/qp_bitmasks.py index 38aa48d7..11965b72 100644 --- a/scripts/utility/qp_bitmasks.py +++ b/scripts/utility/qp_bitmasks.py @@ -22,7 +22,7 @@ def int_to_string(s): assert s>=0 AssertionError """ - assert type(s) in (int, long) + assert type(s) == int assert s>=0 return '{s:0b}'.format(s=s) @@ -62,7 +62,7 @@ def int_to_bitmask(s,bit_kind_size=BIT_KIND_SIZE): ['1111111111111111111111111111111111111111111111111111111111110110'] >>> """ - assert type(s) in (int, long) + assert type(s) == int if s < 0: s = s + (1 << bit_kind_size) return ['{s:0{width}b}'.format(s=s,width=bit_kind_size)] @@ -104,7 +104,7 @@ class BitMask(object): return self._data_int[i] def __setitem__(self,i,value): - if type(value) in (int,long): + if type(value) == int : self._data_int[i] = value elif type(value) == str: s = string_to_bitmask(value,bit_kind_size=self.bit_kind_size)[0] diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f index 721a3e57..dfa7d6b9 100644 --- a/src/ao_basis/cosgtos.irp.f +++ b/src/ao_basis/cosgtos.irp.f @@ -6,13 +6,14 @@ BEGIN_PROVIDER [ logical, use_cosgtos ] logical :: has PROVIDE ezfio_filename + use_cosgtos = .False. if (mpi_master) then call ezfio_has_ao_basis_use_cosgtos(has) if (has) then ! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) else - use_cosgtos = .False. + call ezfio_set_ao_basis_use_cosgtos(use_cosgtos) endif endif IRP_IF MPI_DEBUG diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index f7a42f37..42bbe315 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -55,6 +55,7 @@ subroutine test_5idx implicit none integer :: i,k,j,l,m,n,ipoint double precision :: accu, contrib,new,ref + double precision, external :: three_e_5_idx_exch12_bi_ort i = 1 k = 1 n = 0 @@ -64,18 +65,21 @@ subroutine test_5idx do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num +! if (dabs(three_e_5_idx_direct_bi_ort(m,l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,i,k,j)) > 1.d-10) then +! stop +! endif - new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'direct' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - +! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'direct' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) contrib = dabs(new - ref) @@ -86,51 +90,52 @@ subroutine test_5idx print*,ref,new,contrib stop endif + +! +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch13' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif ! - new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'cycle1' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'cycle2' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch23' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch13' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd669163..7b39235b 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,11 @@ ! --- +double precision function three_e_5_idx_exch12_bi_ort(m,l,i,k,j) result(integral) + implicit none + integer, intent(in) :: m,l,j,k,i + integral = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +end BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] @@ -14,6 +18,8 @@ ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC implicit none @@ -72,7 +78,6 @@ do j = 1, mo_num do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo @@ -125,7 +130,6 @@ do j = 1, mo_num do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) enddo enddo enddo diff --git a/src/tc_bi_ortho/31.tc_bi_ortho.bats b/src/tc_bi_ortho/31.tc_bi_ortho.bats index f5b9d8c0..93bed2ab 100644 --- a/src/tc_bi_ortho/31.tc_bi_ortho.bats +++ b/src/tc_bi_ortho/31.tc_bi_ortho.bats @@ -4,46 +4,50 @@ source $QP_ROOT/tests/bats/common.bats.sh source $QP_ROOT/quantum_package.rc +function get_e() { + grep "eigval_right_tc_bi_orth" $1 | cut -d '=' -f 2 | xargs +} + function run_Ne() { - qp set_file Ne_tc_scf - qp run cisd - qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out + qp set_file Ne_tc_scf + qp run cisd + qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out eref=-128.77020441279302 - energy="$(grep "eigval_right_tc_bi_orth =" Ne_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e Ne_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "Ne" { - run_Ne + run_Ne } function run_C() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out eref=-37.757536149952514 - energy="$(grep "eigval_right_tc_bi_orth =" C_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e C_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "C" { - run_C + run_C } function run_O() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out eref=-74.908518517716161 - energy="$(grep "eigval_right_tc_bi_orth =" O_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e O_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "O" { - run_O + run_O } diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 6abb6b78..49977f37 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -32,28 +32,28 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) if(Ne(1)+Ne(2).ge.3)then !! ! alpha/alpha/beta three-body do i = 1, Ne(1) - ii = occ(i,1) + ii = occ(i,1) do j = i+1, Ne(1) - jj = occ(j,1) + jj = occ(j,1) do m = 1, Ne(2) - mm = occ(m,2) -! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR -! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + mm = occ(m,2) +! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR +! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR hthree += direct_int - exchange_int enddo enddo enddo - + ! beta/beta/alpha three-body do i = 1, Ne(2) - ii = occ(i,2) + ii = occ(i,2) do j = i+1, Ne(2) - jj = occ(j,2) + jj = occ(j,2) do m = 1, Ne(1) - mm = occ(m,1) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + mm = occ(m,1) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) hthree += direct_int - exchange_int enddo @@ -64,10 +64,10 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) do i = 1, Ne(1) ii = occ(i,1) ! 1 do j = i+1, Ne(1) - jj = occ(j,1) ! 2 + jj = occ(j,1) ! 2 do m = j+1, Ne(1) - mm = occ(m,1) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR + mm = occ(m,1) ! 3 +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -80,7 +80,7 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) jj = occ(j,2) ! 2 do m = j+1, Ne(2) mm = occ(m,2) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -96,7 +96,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -110,7 +110,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk integer :: degree,exc(0:2,2,2) integer :: h1, p1, h2, p2, s1, s2 - double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin + double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin double precision :: sym_3_e_int_from_6_idx_tensor integer :: other_spin(2) integer(bit_kind) :: key_j_core(Nint,2),key_i_core(Nint,2) @@ -142,26 +142,26 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! alpha/alpha/beta three-body ! print*,'IN SLAT RULES' if(Ne(1)+Ne(2).ge.3)then - ! hole of spin s1 :: contribution from purely other spin + ! hole of spin s1 :: contribution from purely other spin ispin = other_spin(s1) ! ispin is the other spin than s1 - do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 - ii = occ(i,ispin) - do j = i+1, Ne(ispin) ! j has the same spin than s1 - jj = occ(j,ispin) + do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 + ii = occ(i,ispin) + do j = i+1, Ne(ispin) ! j has the same spin than s1 + jj = occ(j,ispin) ! is == ispin in ::: s1 is is s1 is is s1 is is s1 is is ! < h1 j i | p1 j i > - < h1 j i | p1 i j > - ! - direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) - exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) + ! + direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) + exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) hthree += direct_int - exchange_int enddo enddo - + ! hole of spin s1 :: contribution from mixed other spin / same spin - do i = 1, Ne(ispin) ! other spin - ii = occ(i,ispin) ! other spin - do j = 1, Ne(s1) ! same spin - jj = occ(j,s1) ! same spin + do i = 1, Ne(ispin) ! other spin + ii = occ(i,ispin) ! other spin + do j = 1, Ne(s1) ! same spin + jj = occ(j,s1) ! same spin direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) exchange_int = three_e_4_idx_exch13_bi_ort(jj,ii,p1,h1) ! < h1 j i | p1 j i > - < h1 j i | j p1 i > @@ -174,8 +174,8 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) - hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) + hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo endif @@ -191,7 +191,7 @@ subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -235,29 +235,30 @@ subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) - + if(Ne(1)+Ne(2).ge.3)then - if(s1==s2)then ! same spin excitation + if(s1==s2)then ! same spin excitation ispin = other_spin(s1) do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1) mm = occ(m,ispin) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) hthree += direct_int - exchange_int enddo - do m = 1, Ne(s1) ! pure contribution from s1 + do m = 1, Ne(s1) ! pure contribution from s1 mm = occ(m,s1) hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1) - enddo - else ! different spin excitation + enddo + else ! different spin excitation do m = 1, Ne(s1) - mm = occ(m,s1) ! - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + mm = occ(m,s1) ! + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int enddo do m = 1, Ne(s2) - mm = occ(m,s2) ! + mm = occ(m,s2) ! direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 3fd2576a..882470ed 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -13,8 +13,7 @@ subroutine provide_all_three_ints_bi_ortho PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort endif if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + PROVIDE three_e_5_idx_direct_bi_ort elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then PROVIDE normal_two_body_bi_orth endif diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 2d6bfb27..12bbbec0 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -2,17 +2,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: hmono, htwoe, hthree, htot integer :: occ(Nint*bit_kind_size,2) @@ -39,8 +39,8 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then @@ -53,11 +53,11 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, endif endif else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then @@ -112,72 +112,76 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) !DIR$ FORCEINLINE call bitstring_to_list_ab(particle, occ_particle, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha - ASSERT (tmp(2) == nexc(2)) ! Number of particle beta + ASSERT (tmp(2) == nexc(2)) ! Number of particle beta !DIR$ FORCEINLINE call bitstring_to_list_ab(hole, occ_hole, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha - ASSERT (tmp(2) == nexc(2)) ! Number of holes beta + ASSERT (tmp(2) == nexc(2)) ! Number of holes beta if(s1==s2.and.s1==1)then !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc - hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 1 ! i==alpha ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 2 ! i==beta ==> alpha/alpha/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - elseif(s1==s2.and.s1==2)then + elseif(s1==s2.and.s1==2)then !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 2 ! i==beta ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 1 ! i==alpha==> beta/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo - else ! (h1,p1) == alpha/(h2,p2) == beta + else ! (h1,p1) == alpha/(h2,p2) == beta hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! - ispin = 1 ! i==alpha ==> alpha/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 1 ! i==alpha ==> alpha/beta/alpha terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -188,8 +192,8 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1) hthree -= direct_int - exchange_int enddo - ispin = 2 ! i==beta ==> alpha/beta/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 2 ! i==beta ==> alpha/beta/beta terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h2,p2) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -207,7 +211,7 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations ! ! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha END_DOC @@ -222,16 +226,16 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_ab = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) - do hh2 = 1, n_act_orb !! beta - h2 = list_act(hh2) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) + do hh2 = 1, n_act_orb !! beta + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) - do pp2 = 1, n_act_orb !! beta + do pp2 = 1, n_act_orb !! beta p2 = list_act(pp2) call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib @@ -242,25 +246,25 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == beta contrib = 0.d0 - do mm = 1, Ne(1) !! alpha + do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and m exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -268,7 +272,7 @@ subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h2,p2) and m exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -278,11 +282,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations ! ! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -297,13 +301,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_aa = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! alpha - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! alpha @@ -317,20 +321,20 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == alpha contrib = 0.d0 do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution @@ -340,9 +344,10 @@ subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -351,11 +356,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations ! ! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -370,13 +375,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_bb = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! beta - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! beta + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! beta - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! beta p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! beta @@ -390,18 +395,18 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int !! h1,p1 == beta !! h2,p2 == beta @@ -413,9 +418,10 @@ subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -424,17 +430,17 @@ end subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: htot double precision :: hmono, htwoe @@ -461,17 +467,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) endif htwoe *= phase - htot = htwoe + htot = htwoe end diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e4f7ca93..e725d8e5 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -107,5 +107,6 @@ double precision function three_e_double_parrallel_spin(m,l,j,k,i) three_e_double_parrallel_spin = three_e_5_idx_direct_bi_ort(m,l,j,k,i) ! direct three_e_double_parrallel_spin += three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) & ! two cyclic permutations - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) & ! two first exchange - - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange +! - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange + - three_e_5_idx_direct_bi_ort(m,l,i,k,j) ! last exchange end From 82b2d8bd98e9f3d543b74f766553d28166486094 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:48:23 +0200 Subject: [PATCH 48/79] avoid long name in cosgtos --- .../two_e_Coul_integrals_cosgtos.irp.f | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f index 527a98d5..ea9ff009 100644 --- a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f +++ b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f @@ -29,14 +29,14 @@ double precision function ao_two_e_integral_cosgtos(i, j, k, l) complex*16 :: integral5, integral6, integral7, integral8 complex*16 :: integral_tot - double precision :: ao_two_e_integral_cosgtos_schwartz_accel + double precision :: ao_2e_cosgtos_schwartz_accel complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then !print *, ' with shwartz acc ' - ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + ao_two_e_integral_cosgtos = ao_2e_cosgtos_schwartz_accel(i, j, k, l) else !print *, ' without shwartz acc ' @@ -294,7 +294,7 @@ end function ao_two_e_integral_cosgtos ! --- -double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) +double precision function ao_2e_cosgtos_schwartz_accel(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) @@ -329,7 +329,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + ao_2e_cosgtos_schwartz_accel = 0.d0 dim1 = n_pt_max_integrals @@ -519,8 +519,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -698,8 +697,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -709,11 +707,11 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) deallocate(schwartz_kl) -end function ao_two_e_integral_cosgtos_schwartz_accel +end function ao_2e_cosgtos_schwartz_accel ! --- -BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_2e_cosgtos_schwartz, (ao_num,ao_num)] BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -723,16 +721,16 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,a integer :: i, k double precision :: ao_two_e_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + ao_2e_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) - !$OMP PARALLEL DO PRIVATE(i,k) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_2e_cosgtos_schwartz) & !$OMP SCHEDULE(dynamic) do i = 1, ao_num do k = 1, i - ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) - ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + ao_2e_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_2e_cosgtos_schwartz(k,i) = ao_2e_cosgtos_schwartz(i,k) enddo enddo !$OMP END PARALLEL DO From cab3b12b9b397933ca438717846d28d3164d4804 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:55:51 +0200 Subject: [PATCH 49/79] minor modif in names --- src/tc_bi_ortho/tc_utils.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index e0f29eb8..9023e2f0 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -45,8 +45,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo From 072bea8041a5414da00bc8ddc001186c3c9ff269 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 3 Jun 2023 22:12:30 +0200 Subject: [PATCH 50/79] Improve 4idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 186 +++++- src/bi_ort_ints/three_body_ijm.irp.f | 14 +- src/bi_ort_ints/three_body_ijmk.irp.f | 686 +++++++++++++-------- src/bi_ort_ints/three_body_ijmk_old.irp.f | 290 +++++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 299 +++++---- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 +- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 7 +- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 6 +- 8 files changed, 1071 insertions(+), 420 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmk_old.irp.f diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..bb0424cd 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -8,8 +8,9 @@ program bi_ort_ints my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e - call test_5idx -! call test_5idx2 +! call test_5idx +! call test_5idx2 + call test_4idx end subroutine test_5idx2 @@ -145,3 +146,184 @@ subroutine test_5idx end + +! --- + +subroutine test_4idx() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-5 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_exch12_bi_ort_old +! PROVIDE three_e_4_idx_exch12_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) +! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_exch12_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_cycle_2_bi_ort_old +! PROVIDE three_e_4_idx_cycle_2_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) +! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + return +end diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index b34638b8..ae100fb5 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -23,11 +23,11 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) + !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num do j = 1, mo_num do m = j, mo_num @@ -36,8 +36,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do i = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 95b57e37..39a31751 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -1,287 +1,467 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_4_idx_direct_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) - three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:) + double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp_3d(:,:,:) - three_e_4_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...' + print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP DO COLLAPSE(2) do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 - call print_memory_usage() + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) -END_PROVIDER + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) -! -- + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) - three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) - three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) - three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral - enddo + tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp3) + deallocate(tmp4) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp7(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp7) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp7) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp5) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, tmp6, n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk_old.irp.f b/src/bi_ort_ints/three_body_ijmk_old.irp.f new file mode 100644 index 00000000..1a67f35b --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_old.irp.f @@ -0,0 +1,290 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) + three_e_4_idx_direct_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_direct_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) + three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_1_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! -- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) + three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_2_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) + three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) + three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch13_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) + three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch12_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index d67e1434..3e4412a3 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -19,17 +19,17 @@ end ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint + integer :: i, j, k, m, l + integer :: ipoint + double precision :: wall1, wall0 double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp @@ -38,201 +38,196 @@ end print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + do m = 1, mo_num - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP grad_mli, orb_mat) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) - enddo + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + + enddo + enddo enddo - enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP END DO - !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + deallocate(orb_mat,grad_mli) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - - enddo + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - deallocate(lm_grad_ik) - - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo + !$OMP END PARALLEL DO + + deallocate(lm_grad_ik) + + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) + !$OMP END PARALLEL DO + + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) enddo + deallocate(tmp_mat) call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 882470ed..a2077f0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -9,8 +9,7 @@ subroutine provide_all_three_ints_bi_ortho PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort endif if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif if(.not.double_normal_ord.and.three_e_5_idx_term)then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 7178d6d9..9719a6e7 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -243,7 +243,9 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree, do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO + ! use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree += direct_int - exchange_int enddo else !! ispin NE to ispin_fock @@ -322,7 +324,8 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree -= direct_int - exchange_int enddo else !! ispin NE to ispin_fock diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e725d8e5..3180d946 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -96,9 +96,11 @@ double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i) ! direct - three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations + three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_1_bi_ort(j,m,k,i) & ! two cyclic permutations - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i) & ! two first exchange - - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange + - three_e_4_idx_exch13_bi_ort(j,m,k,i) ! last exchange + ! TODO + ! use transpose end double precision function three_e_double_parrallel_spin(m,l,j,k,i) From a791a28523b787618d571947452fffbc4e7340c6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:19:34 +0200 Subject: [PATCH 51/79] working on memory footprint --- .../grid_becke_vector.irp.f | 18 +++- src/bi_ort_ints/semi_num_ints_mo.irp.f | 23 ++++-- src/bi_ort_ints/three_body_ijmk.irp.f | 82 +++++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 6 +- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 6 -- src/tc_bi_ortho/slater_tc_opt.irp.f | 50 +++++++---- 7 files changed, 121 insertions(+), 72 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..8982fe83 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -62,20 +62,30 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 771d3274..6354b393 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -153,14 +155,14 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' - !call wall_time(wall0) + print *, ' providing int2_grad1_u12_bimo_transp' + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call wall_time(wall1) + print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -188,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp + call wall_time(wall1) + print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 39a31751..ee7e88ef 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -30,10 +30,10 @@ implicit none integer :: ipoint, i, j, k, l, m double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:) - double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) double precision, allocatable :: tmp_3d(:,:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' @@ -47,7 +47,6 @@ allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -55,7 +54,7 @@ !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP tmp1, tmp2, tmp3) !$OMP DO COLLAPSE(2) do i = 1, mo_num do l = 1, mo_num @@ -69,13 +68,9 @@ tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - - tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo @@ -99,7 +94,7 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp1) @@ -116,8 +111,30 @@ enddo !$OMP END PARALLEL DO + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp2) @@ -135,11 +152,10 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp3) - deallocate(tmp4) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -155,8 +171,6 @@ - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -199,26 +213,26 @@ allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp7(n_points_final_grid,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp7) + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) !$OMP DO do i = 1, mo_num do ipoint = 1, n_points_final_grid - tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO !$OMP END PARALLEL - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) do m = 1, mo_num @@ -227,13 +241,13 @@ !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) enddo @@ -243,7 +257,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , tmp5, n_points_final_grid, tmp4, n_points_final_grid & , 0.d0, tmp_3d, mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -264,13 +278,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP mos_l_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do k = 1, mo_num do j = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) @@ -281,7 +295,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & , 0.d0, tmp_3d, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -296,7 +310,7 @@ enddo - deallocate(tmp7) + deallocate(tmp5) deallocate(tmp_3d) @@ -309,13 +323,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, i, & !$OMP mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do j = 1, mo_num do m = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -326,12 +340,12 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) enddo - deallocate(tmp5) + deallocate(tmp4) ! !$OMP PARALLEL DO PRIVATE(i,j,k,m) diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..8667683e 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo + + FREE mos_r_in_r_array END_PROVIDER @@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo + + FREE mos_l_in_r_array END_PROVIDER diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..f9bda058 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,11 +54,13 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif ! --- diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index cc01d144..f8e310df 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE N_int - print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call wall_time(wall1) print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a2077f0f..42c59308 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,24 +1,38 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif + +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord. and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + endif + + return end +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC From 501b9d648702c0f5a2ba0f684ef40ce69d0cb6ce Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:58:29 +0200 Subject: [PATCH 52/79] minor modifs --- .../grid_becke_vector.irp.f | 21 +++++++++++++--- src/bi_ort_ints/bi_ort_ints.irp.f | 25 ++++++++++++++----- src/bi_ort_ints/semi_num_ints_mo.irp.f | 22 ++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 4 --- src/tc_bi_ortho/slater_tc_opt.irp.f | 2 +- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 8982fe83..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -1,10 +1,13 @@ BEGIN_PROVIDER [integer, n_points_final_grid] - implicit none + BEGIN_DOC ! Number of points which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_final_grid = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -16,9 +19,11 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo enddo - print*,'n_points_final_grid = ',n_points_final_grid - print*,'n max point = ',n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) + END_PROVIDER ! --- @@ -41,6 +46,10 @@ END_PROVIDER implicit none integer :: i, j, k, l, i_count double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -65,6 +74,10 @@ END_PROVIDER FREE grid_points_per_atom FREE final_weight_at_r + call wall_time(wall1) + print *, ' wall time for final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index bb0424cd..e64892d7 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -1,22 +1,35 @@ +! --- + program bi_ort_ints - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 10 - my_n_pt_a_grid = 14 - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + !my_n_pt_r_grid = 10 + !my_n_pt_a_grid = 14 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + ! call test_3e ! call test_5idx ! call test_5idx2 - call test_4idx + !call test_4idx + call test_4idx2() end subroutine test_5idx2 PROVIDE three_e_5_idx_cycle_2_bi_ort end +subroutine test_4idx2() + PROVIDE three_e_4_idx_direct_bi_ort +end + subroutine test_3e implicit none integer :: i,k,j,l,m,n,ipoint diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 6354b393..355fa38f 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -54,7 +54,7 @@ BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_poi enddo enddo -! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu END_PROVIDER @@ -161,8 +161,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - print *, ' providing int2_grad1_u12_bimo_transp' - call wall_time(wall0) + !print *, ' providing int2_grad1_u12_bimo_transp' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -180,9 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) - print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !call print_memory_usage() END_PROVIDER @@ -194,8 +194,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, integer :: i, j, ipoint double precision :: wall0, wall1 - call wall_time(wall0) - print *, ' Providing int2_grad1_u12_bimo_t ...' + !call wall_time(wall0) + !print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -212,9 +212,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !call print_memory_usage() END_PROVIDER diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 8667683e..25572854 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -47,8 +47,6 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, enddo enddo - FREE mos_r_in_r_array - END_PROVIDER ! --- @@ -133,8 +131,6 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,m enddo enddo - FREE mos_l_in_r_array - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 42c59308..7acb0d0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -20,7 +20,7 @@ subroutine provide_all_three_ints_bi_ortho() PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif - if(.not. double_normal_ord. and. three_e_5_idx_term) then + if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then PROVIDE normal_two_body_bi_orth From b984d7a1f4a734ae459a3c91e2ca9ee2ea26bc50 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 15:27:07 +0200 Subject: [PATCH 53/79] minor modif --- src/bi_ort_ints/bi_ort_ints.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index e64892d7..5e465d0f 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -27,7 +27,8 @@ subroutine test_5idx2 end subroutine test_4idx2() - PROVIDE three_e_4_idx_direct_bi_ort + !PROVIDE three_e_4_idx_direct_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort end subroutine test_3e From b03709020df45806e59d2dfc18303285d69696d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 4 Jun 2023 16:45:38 +0200 Subject: [PATCH 54/79] 5idx arrays need O(N3) temp arrays --- src/bi_ort_ints/bi_ort_ints.irp.f | 34 +-- src/bi_ort_ints/three_body_ijmkl.irp.f | 330 +++++++++++++------------ 2 files changed, 187 insertions(+), 177 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..5618a2cd 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -8,8 +8,8 @@ program bi_ort_ints my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e + call test_5idx2 call test_5idx -! call test_5idx2 end subroutine test_5idx2 @@ -60,6 +60,8 @@ subroutine test_5idx k = 1 n = 0 accu = 0.d0 + PROVIDE three_e_5_idx_direct_bi_ort_old + do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -69,28 +71,28 @@ subroutine test_5idx ! stop ! endif -! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif +! +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) ! contrib = dabs(new - ref) ! accu += contrib ! if(contrib .gt. 1.d-10)then -! print*,'direct' +! print*,'exch12' ! print*,i,k,j,l,m ! print*,ref,new,contrib ! stop ! endif ! - new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch12' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - ! ! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) ! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) @@ -135,7 +137,7 @@ subroutine test_5idx ! print*,ref,new,contrib ! stop ! endif -! + enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 7b39235b..6e46637d 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -15,7 +15,7 @@ end ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = :: : notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! @@ -23,210 +23,218 @@ end END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) - double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) - double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision, allocatable :: grad_mli(:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:) + double precision, allocatable :: tmp_mat(:,:,:) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + call print_memory_usage print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + three_e_5_idx_direct_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_1_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_2_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_exch23_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_exch13_bi_ort (:,:,:,:,:) = 0.d0 - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + call print_memory_usage + + allocate(tmp_mat(mo_num,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) do i=1,mo_num do l=1,mo_num - do ipoint=1, n_points_final_grid + do ipoint=1, n_points_final_grid - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + orb_mat(ipoint,l,i) = final_weight_at_r_vector(ipoint) & + * mos_l_in_r_array_transp(ipoint,l) & + * mos_r_in_r_array_transp(ipoint,i) - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) - enddo enddo enddo enddo !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + tmp_mat = 0.d0 + call print_memory_usage +! + do m = 1, mo_num - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + grad_mli(ipoint,l) = & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) +& + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) +& + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + enddo + enddo + !$OMP END PARALLEL DO - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + call dgemm('T','N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0,& + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num + !$OMP PARALLEL PRIVATE(j,k,l) + !$OMP DO + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP DO do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + do k = 1, mo_num + three_e_5_idx_direct_bi_ort(m,k,i,l,j) = three_e_5_idx_direct_bi_ort(m,k,i,l,j) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + deallocate(grad_mli) + + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num)) + + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid + + rm_grad_ik(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - deallocate(lm_grad_ik) + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,k,j) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,j,k) + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l) + three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) = three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) - tmp_mat(l,j,k) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k) + three_e_5_idx_exch13_bi_ort (m,l,i,k,j) = three_e_5_idx_exch13_bi_ort (m,l,i,k,j) - tmp_mat(k,j,l) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,j,k) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + deallocate(rm_grad_ik) + deallocate(rk_grad_im) + deallocate(lk_grad_mi) + deallocate(lm_grad_ik) - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) enddo + deallocate(orb_mat) + call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 From b48e6b269d624ecfabcbb4895c75bd397646c0d8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 01:33:55 +0200 Subject: [PATCH 55/79] start optim normal ordering --- src/tc_bi_ortho/normal_ordered.irp.f | 438 ++++++++++++----------- src/tc_bi_ortho/normal_ordered_old.irp.f | 390 ++++++++++++++++++++ 2 files changed, 625 insertions(+), 203 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_old.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index f8e310df..a092762b 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -14,7 +14,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer :: i, h1, p1, h2, p2 integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: hthree_aaa, hthree_aab double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) @@ -39,57 +39,65 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 + ! opposite spin double excitations : s1 /= s2 + normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) + do pp1 = 1, n_act_orb p1 = list_act(pp1) + do hh2 = 1, n_act_orb h2 = list_act(hh2) + do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 < h2) .and. (p1 > p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -116,178 +124,6 @@ END_PROVIDER ! --- -subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, integral - - !!!! double alpha/beta - hthree = 0.d0 - - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) - enddo - - do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo - - return -end - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: h1, p1, h2, p2, i - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth_ab = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for same spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i,ii,j,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aab, hthree_aaa - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - normal_two_body_bi_orth_aa_bb = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1 , n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1 , n_act_orb - p2 = list_act(pp2) - if(h1h2 - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC @@ -388,3 +224,199 @@ end ! --- +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & + , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + + enddo ! p1 + enddo ! h1 + enddo ! i + + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + + ! TODO + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + endif + + ! --- + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + + !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f new file mode 100644 index 00000000..553cafdb --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -0,0 +1,390 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_old ...' + call wall_time(wall0) + + PROVIDE N_int + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + read(11) normal_two_body_bi_orth_old + close(11) + + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_old = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth_old) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_old + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth_old ', wall1-wall0 + +END_PROVIDER + +! --- + +subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, integral + + !!!! double alpha/beta + hthree = 0.d0 + + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) + enddo + + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + + return +end + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: h1, p1, h2, p2, i + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_ab = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for same spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i,ii,j,h1,p1,h2,p2 + integer :: hh1,hh2,pp1,pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree_aab, hthree_aaa + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op)then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + normal_two_body_bi_orth_aa_bb = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1 , n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1 , n_act_orb + p2 = list_act(pp2) + if(h1h2 + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + BEGIN_DOC + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii,i + double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 + double precision :: integral,int_exc_l,int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13= -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) + enddo + + do ii = Ne(2)+1,Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + enddo + + return +end + +! --- + +subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 + double precision :: integral, int_exc_l, int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct - int_exc_23 + enddo + + return +end + +! --- + From 471283634919dd134e294aa71c0bac0a37d4872c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 11:17:08 +0200 Subject: [PATCH 56/79] normal ordering: aba-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 495 ++++++++++++++++++------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 48 ++- 2 files changed, 411 insertions(+), 132 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index a092762b..59e78b92 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -22,8 +22,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing normal_two_body_bi_orth ...' call wall_time(wall0) - PROVIDE N_int - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -48,12 +46,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif ! opposite spin double excitations : s1 /= s2 - normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) + PROVIDE no_aba_contraction - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & + !$OMP no_aba_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -97,7 +96,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -124,103 +123,103 @@ END_PROVIDER ! --- -subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - BEGIN_DOC - ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii,i - double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 - double precision :: integral,int_exc_l,int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13= -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) - enddo - - do ii = Ne(2)+1,Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23 = -1.d0 * integral - - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) - enddo - - return -end +!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! BEGIN_DOC +! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 +! END_DOC +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii,i +! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 +! double precision :: integral,int_exc_l,int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) +! enddo +! +! do ii = Ne(2)+1,Ne(1) ! purely open-shell part +! i = occ(ii,1) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23 = -1.d0 * integral +! +! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) +! enddo +! +! return +!end ! --- -subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 - double precision :: integral, int_exc_l, int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct - int_exc_23 - enddo - - return -end +!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii, i +! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 +! double precision :: integral, int_exc_l, int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct - int_exc_23 +! enddo +! +! return +!end ! --- @@ -264,6 +263,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) + double precision, allocatable :: tmp_2d(:,:) + allocate(tmp_2d(mo_num,mo_num)) + + ! purely closed shell part do ii = 1, Ne(2) i = occ(ii,2) @@ -313,9 +316,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & - , 0.d0, tmp_3d, mo_num) + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num @@ -364,38 +368,163 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & - , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 enddo ! i - double precision :: integral, int_direct, int_exc_13, int_exc_12 - ! TODO + + + + + + ! purely open-shell part if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral + do h1 = 1, mo_num - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i endif - ! --- + + + + + + + + + + + + + + + deallocate(tmp_3d) deallocate(tmp1, tmp2) @@ -403,17 +532,121 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpvec_1, tmpvec_2) - !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) - do h1 = 1, mo_num - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO + + + + + + + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) +! enddo +! enddo +! enddo +! enddo + + + ! --- + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + +! no_aba_contraction = 0.d0 +! +! ! purely closed shell part +! do ii = 1, Ne(2) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo + +! ! purely open-shell part +! if(Ne(2) < Ne(1)) then +! +! do ii = Ne(2) + 1, Ne(1) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo +! endif + + ! --- + +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, integral) & +! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) +! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + END_PROVIDER diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index df86ea65..33b5c5aa 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,12 +11,14 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_h_u0 +! call test_h_u0 ! call test_slater_tc_opt ! call timing_tot ! call timing_diag ! call timing_single ! call timing_double + + call test_no() end subroutine test_h_u0 @@ -252,3 +254,47 @@ subroutine timing_double end +! --- + +subroutine test_no() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing normal_two_body_bi_orth ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_old + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 + + return +end + +! --- + + From 3a5dd05d7eb61dc21b1ec16eba330e3687b54001 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 15:13:18 +0200 Subject: [PATCH 57/79] NO: working on AAB contractions --- src/tc_bi_ortho/normal_ordered.irp.f | 428 +++++++++++---------------- src/utils/util.irp.f | 37 ++- 2 files changed, 217 insertions(+), 248 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 59e78b92..b3c413d3 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -45,14 +45,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - ! opposite spin double excitations : s1 /= s2 PROVIDE no_aba_contraction + PROVIDE no_aab_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction) + !$OMP no_aba_contraction,no_aab_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -72,9 +72,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! same spin double excitations : s1 == s2 if((h1 < h2) .and. (p1 > p2)) then - ! with opposite spin contributions - call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 - ! same spin double excitations with same spin contributions if(Ne(2) .ge. 3) then call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 @@ -84,9 +81,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2) .ge. 3) then ! same spin double excitations with same spin contributions call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) @@ -96,7 +90,9 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & + + no_aab_contraction(p2,h2,p1,h1) & + + 0.5d0 * hthree_aaa enddo enddo enddo @@ -123,106 +119,6 @@ END_PROVIDER ! --- -!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! BEGIN_DOC -! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 -! END_DOC -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii,i -! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 -! double precision :: integral,int_exc_l,int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) -! enddo -! -! do ii = Ne(2)+1,Ne(1) ! purely open-shell part -! i = occ(ii,1) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23 = -1.d0 * integral -! -! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) -! enddo -! -! return -!end - -! --- - -!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii, i -! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 -! double precision :: integral, int_exc_l, int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct - int_exc_23 -! enddo -! -! return -!end - -! --- - BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -236,6 +132,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp_3d(:,:,:) double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) print*,' Providing no_aba_contraction ...' call wall_time(wall0) @@ -262,8 +159,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) - - double precision, allocatable :: tmp_2d(:,:) allocate(tmp_2d(mo_num,mo_num)) @@ -386,13 +281,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! i - - - - - - - ! purely open-shell part if(Ne(2) < Ne(1)) then do ii = Ne(2) + 1, Ne(1) @@ -510,146 +398,192 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - - - - - - - - - - - - - - - - deallocate(tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) - - - - - - - - no_aba_contraction = -0.5d0 * no_aba_contraction call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) -! enddo -! enddo -! enddo -! enddo - - - ! --- - - double precision :: integral, int_direct, int_exc_13, int_exc_12 - -! no_aba_contraction = 0.d0 -! -! ! purely closed shell part -! do ii = 1, Ne(2) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo - -! ! purely open-shell part -! if(Ne(2) < Ne(1)) then -! -! do ii = Ne(2) + 1, Ne(1) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo -! endif - - ! --- - -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, integral) & -! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) -! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - - + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = 0.5d0 * no_aab_contraction + call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + + +END_PROVIDER + +! --- diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index aba99c2b..a9f1a438 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -490,7 +490,7 @@ end subroutine check_sym subroutine sum_A_At(A, N) !BEGIN_DOC - ! useful for symmetrizing a tensor without a temporary tensor + ! add a tensor with its transpose without a temporary tensor !END_DOC implicit none @@ -521,3 +521,38 @@ subroutine sum_A_At(A, N) end +! --- + +subroutine sub_A_At(A, N) + + !BEGIN_DOC + ! substruct a tensor with its transpose without a temporary tensor + !END_DOC + + implicit none + integer, intent(in) :: N + double precision, intent(inout) :: A(N,N) + integer :: i, j + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j) & + !$OMP SHARED (A, N) + !$OMP DO + do j = 1, N + do i = j, N + A(i,j) -= A(j,i) + enddo + enddo + !$OMP END DO + + !$OMP DO + do j = 2, N + do i = 1, j-1 + A(i,j) = -A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end From b0da0ac04d49b3fbbbe0eb9649b0b6da87cce6d2 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 16:08:46 +0200 Subject: [PATCH 58/79] normal ordering: aab-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 73 +++++++++++++++++++--------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index b3c413d3..3a1e79fd 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -230,11 +230,11 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -398,7 +398,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - deallocate(tmp_3d) + deallocate(tmp_2d, tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -446,12 +446,12 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif + allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) ! purely closed shell part @@ -471,10 +471,10 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) enddo !$OMP END DO !$OMP END PARALLEL @@ -515,17 +515,17 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL @@ -567,9 +567,38 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1) deallocate(tmpvec_1) - no_aab_contraction = 0.5d0 * no_aab_contraction - call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + no_aab_contraction = -0.5d0 * no_aab_contraction + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -579,11 +608,11 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ enddo enddo enddo + !$OMP END PARALLEL call wall_time(wall1) print*,' Wall time for no_aab_contraction', wall1-wall0 - END_PROVIDER ! --- From aafca191f1fe271575f80f5d16eb80587290213c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 20:59:34 +0200 Subject: [PATCH 59/79] normal ordering: aaa-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 593 +++++++++++++++++++++-- src/tc_bi_ortho/normal_ordered_old.irp.f | 4 +- 2 files changed, 568 insertions(+), 29 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 3a1e79fd..fea229c9 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -47,12 +47,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE no_aba_contraction PROVIDE no_aab_contraction + PROVIDE no_aaa_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction,no_aab_contraction) + !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -66,33 +67,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do pp2 = 1, n_act_orb p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - ! same spin double excitations : s1 == s2 - if((h1 < h2) .and. (p1 > p2)) then - - ! same spin double excitations with same spin contributions - if(Ne(2) .ge. 3) then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - - else - - if(Ne(2) .ge. 3) then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - - endif - - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & - + no_aab_contraction(p2,h2,p1,h1) & - + 0.5d0 * hthree_aaa + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) enddo enddo enddo @@ -616,3 +591,565 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 553cafdb..417580dd 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -89,6 +89,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, hthree_aaa = 0.d0 endif endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) enddo enddo @@ -350,7 +351,8 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23 = -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + !hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + hthree += 0.5d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo return From 4b9b2a25603cda0d7687938e13384cb53877b9a9 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 8 Jun 2023 10:49:06 +0200 Subject: [PATCH 60/79] update ROHF F matrix with gamess parametrization --- src/scf_utils/fock_matrix.irp.f | 194 +++++++++++++++++++++++--------- 1 file changed, 138 insertions(+), 56 deletions(-) diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f index 61633d3b..1942e542 100644 --- a/src/scf_utils/fock_matrix.irp.f +++ b/src/scf_utils/fock_matrix.irp.f @@ -5,6 +5,90 @@ ! Fock matrix on the MO basis. ! For open shells, the ROHF Fock Matrix is :: ! + ! | Rcc | F^b | Fcv | + ! |-----------------------| + ! | F^b | Roo | F^a | + ! |-----------------------| + ! | Fcv | F^a | Rvv | + ! + ! C: Core, O: Open, V: Virtual + ! + ! Rcc = Acc Fcc^a + Bcc Fcc^b + ! Roo = Aoo Foo^a + Boo Foo^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b + ! Fcv = (F^a + F^b)/2 + ! + ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) + ! A,B: Coupling parameters + ! + ! J. Chem. Phys. 133, 141102 (2010), https://doi.org/10.1063/1.3503173 + ! Coupling parameters from J. Chem. Phys. 125, 204110 (2006); https://doi.org/10.1063/1.2393223. + ! cc oo vv + ! A -0.5 0.5 1.5 + ! B 1.5 0.5 -0.5 + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + Fock_matrix_mo = Fock_matrix_mo_alpha + else + ! Core + do j = 1, elec_beta_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = - 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 1.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + ! Open + do j = elec_beta_num+1, elec_alpha_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + enddo + ! Virtual + do j = elec_alpha_num+1, mo_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 1.5d0 * fock_matrix_mo_alpha(i,j) & + - 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + endif + + ! Old + ! BEGIN_DOC + ! Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is :: + ! ! | F-K | F + K/2 | F | ! |---------------------------------| ! | F + K/2 | F | F - K/2 | @@ -16,64 +100,64 @@ ! ! K = Fb - Fa ! - END_DOC - integer :: i,j,n - if (elec_alpha_num == elec_beta_num) then - Fock_matrix_mo = Fock_matrix_mo_alpha - else + ! END_DOC + !integer :: i,j,n + !if (elec_alpha_num == elec_beta_num) then + ! Fock_matrix_mo = Fock_matrix_mo_alpha + !else - do j=1,elec_beta_num - ! F-K - do i=1,elec_beta_num !CC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K/2 - do i=elec_beta_num+1,elec_alpha_num !CA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_alpha_num+1, mo_num !CV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - enddo + ! do j=1,elec_beta_num + ! ! F-K + ! do i=1,elec_beta_num !CC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K/2 + ! do i=elec_beta_num+1,elec_alpha_num !CA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_alpha_num+1, mo_num !CV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! enddo - do j=elec_beta_num+1,elec_alpha_num - ! F+K/2 - do i=1,elec_beta_num !AC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_beta_num+1,elec_alpha_num !AA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_alpha_num+1, mo_num !AV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_beta_num+1,elec_alpha_num + ! ! F+K/2 + ! do i=1,elec_beta_num !AC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_beta_num+1,elec_alpha_num !AA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_alpha_num+1, mo_num !AV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - do j=elec_alpha_num+1, mo_num - ! F - do i=1,elec_beta_num !VC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_beta_num+1,elec_alpha_num !VA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K - do i=elec_alpha_num+1,mo_num !VV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & - + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_alpha_num+1, mo_num + ! ! F + ! do i=1,elec_beta_num !VC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_beta_num+1,elec_alpha_num !VA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K + ! do i=elec_alpha_num+1,mo_num !VV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & + ! + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - endif + !endif do i = 1, mo_num Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) @@ -115,8 +199,6 @@ END_PROVIDER - - BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_num,mo_num) ] implicit none BEGIN_DOC From 374a88bc624396370660182f6da3d876934b35b9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:51:52 +0200 Subject: [PATCH 61/79] normal ordering with DGEMM: OK --- src/tc_bi_ortho/normal_ordered.irp.f | 1230 ++++++++--------- .../normal_ordered_contractions.irp.f | 1062 ++++++++++++++ 2 files changed, 1615 insertions(+), 677 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_contractions.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index fea229c9..7259c270 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -11,16 +11,15 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, h1, p1, h2, p2 + integer :: i, ii, h1, p1, h2, p2, ipoint integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aaa, hthree_aab - double precision :: wall0, wall1 + double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + call wall_time(walli) if(read_tc_norm_ord) then @@ -30,6 +29,11 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -45,224 +49,33 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - PROVIDE no_aba_contraction - PROVIDE no_aab_contraction - PROVIDE no_aaa_contraction + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) + ! --- + ! aba contraction - do pp1 = 1, n_act_orb - p1 = list_act(pp1) + print*,' Providing aba_contraction ...' + call wall_time(wall0) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) + tmp = 0.d0 - do pp2 = 1, n_act_orb - p2 = list_act(pp2) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate( occ ) - deallocate( key_i_core ) - endif - - if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") - call ezfio_set_work_empty(.False.) - write(11) normal_two_body_bi_orth - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aba_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - do h1 = 1, mo_num + ! to minimize the number of operations !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & @@ -304,29 +117,30 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo !$OMP END PARALLEL DO + ! to avoid tmp(N^4) do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -355,313 +169,171 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2) + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - no_aba_contraction = -0.5d0 * no_aba_contraction - call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + do h1 = 1, mo_num - call wall_time(wall1) - print*,' Wall time for no_aba_contraction', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aab_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpvec_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END DO + !$OMP END PARALLEL - enddo ! p1 - enddo ! h1 - enddo ! i + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - deallocate(tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1) - deallocate(tmpvec_1) + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO - no_aab_contraction = -0.5d0 * no_aab_contraction + do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aab_contraction, mo_num) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END PARALLEL + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO - call wall_time(wall1) - print*,' Wall time for no_aab_contraction', wall1-wall0 + enddo ! p1 + enddo ! h1 + enddo !i + endif -END_PROVIDER + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmp_2d) -! --- + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 - BEGIN_DOC - ! - ! if: - ! h1 < h2 - ! p1 > p2 - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] - ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! else: - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! - ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) - ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > - ! + < p2 p1 i | i h2 h1 > - ! + < p1 i p2 | i h2 h1 > ] - ! - ! - END_DOC + normal_two_body_bi_orth = tmp - use bitmasks ! you need to include the bitmasks_module.f90 features + ! --- + ! aab contraction - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + print*,' Providing aab_contraction ...' + call wall_time(wall0) - print*,' Providing no_aaa_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - if(Ne(2) .lt. 3) then - - no_aaa_contraction = 0.d0 - - else + tmp = 0.d0 allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmpvec_3(n_points_final_grid,3)) ! purely closed shell part do ii = 1, Ne(2) @@ -677,21 +349,13 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP SHARED (n_points_final_grid, i, h1, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo !$OMP END DO !$OMP END PARALLEL @@ -722,39 +386,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -763,58 +395,32 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! to avoid tmp(N^4) do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) !$OMP DO do h2 = 1, mo_num do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo !$OMP END DO @@ -828,47 +434,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -877,14 +443,85 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! h1 enddo ! i + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + tmp = -0.5d0 * tmp - ! purely open-shell part - if(Ne(2) < Ne(1)) then + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) do h1 = 1, mo_num @@ -932,16 +569,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -964,16 +601,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -1039,15 +676,15 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1074,82 +711,321 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 1.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2, tmp3) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + ! purely open-shell part + if(Ne(2) < Ne(1)) then - no_aaa_contraction = -0.5d0 * no_aaa_contraction + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + ! to avoid tmp(N^4) + do h1 = 1, mo_num - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + call wall_time(wallf) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth ', wallf-walli -END_PROVIDER +END_PROVIDER ! --- + diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f new file mode 100644 index 00000000..855cfd17 --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -0,0 +1,1062 @@ + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- From ee06ddf85e2b3fc83faa25515e80b262a2932aa7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:59:14 +0200 Subject: [PATCH 62/79] free two (3xN_gridxMOxMO) tables in TC-CIPSI --- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index f9bda058..3e6f229b 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -63,7 +63,9 @@ subroutine run_cipsi_tc call provide_all_three_ints_bi_ortho() endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then @@ -78,13 +80,16 @@ subroutine run_cipsi_tc call json_close else + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(elec_alpha_num+elec_beta_num.ge.3)then if(three_body_h_tc)then call provide_all_three_ints_bi_ortho endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi From 22e1dcd1c4a5cdce7159d926443968b83dfb271c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 9 Jun 2023 21:32:13 +0200 Subject: [PATCH 63/79] 4-idx tensors: DGEMM with tmp(N3) added --- src/bi_ort_ints/bi_ort_ints.irp.f | 192 +++++--- src/bi_ort_ints/three_body_ijmk.irp.f | 564 +++++++---------------- src/bi_ort_ints/three_body_ijmk_n4.irp.f | 484 +++++++++++++++++++ 3 files changed, 771 insertions(+), 469 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmk_n4.irp.f diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 75af8fb1..7f90c6f3 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -18,10 +18,11 @@ program bi_ort_ints ! call test_3e ! call test_5idx ! call test_5idx2 - !call test_4idx - call test_4idx2() - call test_5idx2 - call test_5idx + call test_4idx() + call test_4idx_n4() + !call test_4idx2() + !call test_5idx2 + !call test_5idx end subroutine test_5idx2 @@ -167,13 +168,138 @@ end ! --- +subroutine test_4idx_n4() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-10 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + return +end + +! --- + subroutine test_4idx() implicit none integer :: i, j, k, l double precision :: accu, contrib, new, ref, thr - thr = 1d-5 + thr = 1d-10 PROVIDE three_e_4_idx_direct_bi_ort_old PROVIDE three_e_4_idx_direct_bi_ort @@ -231,34 +357,6 @@ subroutine test_4idx() ! --- -! PROVIDE three_e_4_idx_exch12_bi_ort_old -! PROVIDE three_e_4_idx_exch12_bi_ort -! -! accu = 0.d0 -! do i = 1, mo_num -! do j = 1, mo_num -! do k = 1, mo_num -! do l = 1, mo_num -! -! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) -! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. thr) then -! print*, ' problem in three_e_4_idx_exch12_bi_ort' -! print*, l, k, j, i -! print*, ref, new, contrib -! stop -! endif -! -! enddo -! enddo -! enddo -! enddo -! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 - - ! --- - PROVIDE three_e_4_idx_cycle_1_bi_ort_old PROVIDE three_e_4_idx_cycle_1_bi_ort @@ -287,34 +385,6 @@ subroutine test_4idx() ! --- -! PROVIDE three_e_4_idx_cycle_2_bi_ort_old -! PROVIDE three_e_4_idx_cycle_2_bi_ort -! -! accu = 0.d0 -! do i = 1, mo_num -! do j = 1, mo_num -! do k = 1, mo_num -! do l = 1, mo_num -! -! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) -! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. thr) then -! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' -! print*, l, k, j, i -! print*, ref, new, contrib -! stop -! endif -! -! enddo -! enddo -! enddo -! enddo -! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 - - ! --- - PROVIDE three_e_4_idx_exch23_bi_ort_old PROVIDE three_e_4_idx_exch23_bi_ort diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index ee7e88ef..0d466f9f 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -3,9 +3,8 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -13,28 +12,25 @@ ! ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: ipoint, i, j, k, l, m + integer :: ipoint, i, j, k, m, n double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) - double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp4(:,:,:) - double precision, allocatable :: tmp5(:,:) - double precision, allocatable :: tmp_3d(:,:,:) + double precision :: tmp_loc_1, tmp_loc_2 + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) + double precision, allocatable :: tmp_2d(:,:) + double precision, allocatable :: tmp_aux_1(:,:,:), tmp_aux_2(:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) @@ -42,324 +38,188 @@ provide mos_r_in_r_array_transp mos_l_in_r_array_transp - allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) - - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + ! to reduce the number of operations + allocate(tmp_aux_1(n_points_final_grid,4,mo_num)) + allocate(tmp_aux_2(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & + !$OMP PRIVATE (n, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - - tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp2) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp3) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(tmp_4d) - - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp5(n_points_final_grid,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp_aux_1, tmp_aux_2) !$OMP DO - do i = 1, mo_num + do n = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmp_aux_1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) * final_weight_at_r_vector(ipoint) + + tmp_aux_2(ipoint,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO !$OMP END PARALLEL + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,4,mo_num)) + allocate(tmp2(n_points_final_grid,4,mo_num)) - allocate(tmp4(n_points_final_grid,mo_num,mo_num)) - - do m = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, m, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) + ! loops approach to break the O(N^4) scaling in memory + do k = 1, mo_num do i = 1, mo_num - do k = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & - + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & - + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) - enddo + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_2, tmp1) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) + tmp_loc_2 = tmp_aux_2(ipoint,n) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, tmp4, n_points_final_grid & - , 0.d0, tmp_3d, mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, k, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, m, & - !$OMP mos_l_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) - do k = 1, mo_num - do j = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & - * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & - + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & - + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & - , 0.d0, tmp_3d, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - enddo - - deallocate(tmp5) - deallocate(tmp_3d) - - - - do i = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (m, j, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(j,m) do j = 1, mo_num do m = 1, mo_num - do ipoint = 1, n_points_final_grid + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO - tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & - * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) + + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) + tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) + tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & - , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - enddo + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO - deallocate(tmp4) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO -! !$OMP PARALLEL DO PRIVATE(i,j,k,m) -! do i = 1, mo_num -! do k = 1, mo_num -! do j = 1, mo_num -! do m = 1, mo_num -! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) -! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) -! enddo -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO + enddo ! i + + do j = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, j, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,j) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,j,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,j,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,j,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * int2_grad1_u12_bimo_t(ipoint,1,j,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,j) * int2_grad1_u12_bimo_t(ipoint,2,j,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,j) * int2_grad1_u12_bimo_t(ipoint,3,j,n) + + tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n) + tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n) + tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n) + tmp2(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,m) + do i = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! j + enddo !k + + deallocate(tmp_2d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp_aux_1) + deallocate(tmp_aux_2) call wall_time(wall1) @@ -370,115 +230,3 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki - ! - END_DOC - - implicit none - integer :: i, j, k, l, m, ipoint - double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) - - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) - allocate(tmp6(n_points_final_grid,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5, tmp6) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & - + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & - + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) - - tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, tmp6, n_points_final_grid & - , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) - - deallocate(tmp5) - deallocate(tmp6) - - - allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(tmp_4d) - - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - diff --git a/src/bi_ort_ints/three_body_ijmk_n4.irp.f b/src/bi_ort_ints/three_body_ijmk_n4.irp.f new file mode 100644 index 00000000..157b70f4 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_n4.irp.f @@ -0,0 +1,484 @@ + +! --- + + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) + + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp3(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp3) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp5(1,1), n_points_final_grid, tmp4(1,1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp5) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp4) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_exch23_bi_ort_n4 ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5(1,1,1), n_points_final_grid, tmp6(1,1,1), n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort_n4(1,1,1,1), mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_exch23_bi_ort_n4', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + From ba65e672166d5f9f41cebdf28b05f26f3adfef61 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 9 Jun 2023 22:05:55 +0200 Subject: [PATCH 64/79] 4-idx tensors seems to be correct --- src/bi_ort_ints/three_body_ijmk.irp.f | 2 +- src/bi_ort_ints/three_body_ijmk_n4.irp.f | 38 +++++++++++++----------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 0d466f9f..669861b7 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -194,7 +194,7 @@ tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n) tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n) tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n) - tmp2(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO diff --git a/src/bi_ort_ints/three_body_ijmk_n4.irp.f b/src/bi_ort_ints/three_body_ijmk_n4.irp.f index 157b70f4..e3faeff0 100644 --- a/src/bi_ort_ints/three_body_ijmk_n4.irp.f +++ b/src/bi_ort_ints/three_body_ijmk_n4.irp.f @@ -1,11 +1,11 @@ ! --- - BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -13,11 +13,11 @@ ! ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) + ! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) + ! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) ! ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign ! @@ -77,6 +77,7 @@ !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) @@ -97,7 +98,6 @@ , tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) - deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -133,10 +133,12 @@ !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + deallocate(tmp2) !$OMP PARALLEL DO PRIVATE(i,j,k,m) @@ -202,7 +204,7 @@ do k = 1, mo_num do j = 1, mo_num do m = 1, mo_num - three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) enddo enddo enddo @@ -294,9 +296,9 @@ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & - , 0.d0, tmp_3d, mo_num*mo_num) + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) do i = 1, mo_num @@ -339,8 +341,8 @@ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_l_in_r_array_transp(1,1), n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num) enddo @@ -353,8 +355,8 @@ ! do k = 1, mo_num ! do j = 1, mo_num ! do m = 1, mo_num -! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) -! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) +! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) ! enddo ! enddo ! enddo From 6e31ca280d5a11db7b09c5fa04e2f36a7d11c39f Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 10:42:32 +0200 Subject: [PATCH 65/79] // in Norm_Ord --- .../normal_ordered_contractions.irp.f | 289 +++++++++++++++++- 1 file changed, 280 insertions(+), 9 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 855cfd17..6f70516d 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aba_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -16,7 +16,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) double precision, allocatable :: tmp_2d(:,:) - print*,' Providing no_aba_contraction ...' + print*,' Providing no_aba_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -102,7 +102,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -153,7 +153,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -220,7 +220,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -270,7 +270,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -285,11 +285,11 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) - no_aba_contraction = -0.5d0 * no_aba_contraction - call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + no_aba_contraction_v0 = -0.5d0 * no_aba_contraction_v0 + call sum_A_At(no_aba_contraction_v0(1,1,1,1), mo_num*mo_num) call wall_time(wall1) - print*,' Wall time for no_aba_contraction', wall1-wall0 + print*,' Wall time for no_aba_contraction_v0', wall1-wall0 END_PROVIDER @@ -1060,3 +1060,274 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER From d9921922fc00efd0146aa5669219c15bb0c408e9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:24:06 +0200 Subject: [PATCH 66/79] NO aba // ok --- .../normal_ordered_contractions.irp.f | 118 +++++++++--------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 44 ++++++- 2 files changed, 101 insertions(+), 61 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 6f70516d..980181e7 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -1104,12 +1104,20 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP no_aba_contraction) - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + !$OMP DO do ii = 1, Ne(2) @@ -1147,7 +1155,9 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL enddo enddo enddo @@ -1177,7 +1187,9 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL enddo enddo @@ -1195,28 +1207,40 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END PARALLEL - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - - ! purely open-shell part if(Ne(2) < Ne(1)) then + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) do h1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) @@ -1227,16 +1251,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO do p1 = 1, mo_num do ipoint = 1, n_points_final_grid tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & @@ -1247,82 +1262,65 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL enddo enddo enddo - !$OMP END PARALLEL DO do p1 = 1, mo_num - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO do h2 = 1, mo_num do ipoint = 1, n_points_final_grid tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & , mos_l_in_r_array_transp(1,1), n_points_final_grid & , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL enddo enddo - !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 enddo !i - endif + !$OMP END DO - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2) + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif no_aba_contraction = -0.5d0 * no_aba_contraction call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 33b5c5aa..a3cb1692 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -18,7 +18,8 @@ program tc_bi_ortho ! call timing_single ! call timing_double - call test_no() + !call test_no() + call test_no_aba() end subroutine test_h_u0 @@ -297,4 +298,45 @@ end ! --- +subroutine test_no_aba() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aba_contraction ...' + + thr = 1d-8 + + PROVIDE no_aba_contraction_v0 + PROVIDE no_aba_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aba_contraction (l,k,j,i) + ref = no_aba_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aba_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aba_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + From 92a72a096840c829d9ae5bb8ec0d683bc62ec0d9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:38:41 +0200 Subject: [PATCH 67/79] no aab // --- .../normal_ordered_contractions.irp.f | 210 +++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 43 ++++ 2 files changed, 243 insertions(+), 10 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 980181e7..f066c958 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -295,7 +295,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aab_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -310,7 +310,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) double precision, allocatable :: tmp_2d(:,:) - print*,' Providing no_aab_contraction ...' + print*,' Providing no_aab_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -387,7 +387,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -435,7 +435,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -449,19 +449,19 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1) deallocate(tmpvec_1) - no_aab_contraction = -0.5d0 * no_aab_contraction + no_aab_contraction_v0 = -0.5d0 * no_aab_contraction_v0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aab_contraction, mo_num) + !$OMP SHARED (no_aab_contraction_v0, mo_num) !$OMP DO do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num do p2 = p1, mo_num - no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + no_aab_contraction_v0(p2,h2,p1,h1) -= no_aab_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -473,7 +473,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + no_aab_contraction_v0(p2,h2,p1,h1) = -no_aab_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -485,15 +485,16 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = h1+1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + no_aab_contraction_v0(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL call wall_time(wall1) - print*,' Wall time for no_aab_contraction', wall1-wall0 + print*,' Wall time for no_aab_contraction_v0', wall1-wall0 END_PROVIDER @@ -1329,3 +1330,192 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ print*,' Wall time for no_aba_contraction', wall1-wall0 END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aab_contraction) + + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index a3cb1692..4f190407 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -20,6 +20,7 @@ program tc_bi_ortho !call test_no() call test_no_aba() + call test_no_aab() end subroutine test_h_u0 @@ -340,3 +341,45 @@ end ! --- +subroutine test_no_aab() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aab_contraction ...' + + thr = 1d-8 + + PROVIDE no_aab_contraction_v0 + PROVIDE no_aab_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aab_contraction (l,k,j,i) + ref = no_aab_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aab_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aab_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + + From caa11f20ea4a9aa812e6bc0c6dcd2faa3e0d485b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 10 Jun 2023 11:56:07 +0200 Subject: [PATCH 68/79] Fixed singles when no beta exc --- scripts/compilation/cache_compile.py | 2 +- src/cipsi/selection.irp.f | 425 ++++----------------------- src/cipsi/selection_old.irp.f | 350 ++++++++++++++++++++++ src/cipsi/selection_singles.irp.f | 356 ++++++++++++++++++++++ src/ezfio_files/00.create.bats | 28 ++ src/fci/40.fci.bats | 113 ++++--- src/hartree_fock/10.hf.bats | 29 ++ src/tools/print_wf.irp.f | 1 + tests/input/h2_1.xyz | 6 + tests/input/h2_3.xyz | 6 + tests/input/h3_2.xyz | 7 + tests/input/h3_4.xyz | 7 + tests/input/h4_1.xyz | 7 + tests/input/h4_3.xyz | 7 + tests/input/h4_5.xyz | 7 + 15 files changed, 947 insertions(+), 404 deletions(-) create mode 100644 src/cipsi/selection_old.irp.f create mode 100644 src/cipsi/selection_singles.irp.f create mode 100644 tests/input/h2_1.xyz create mode 100644 tests/input/h2_3.xyz create mode 100644 tests/input/h3_2.xyz create mode 100644 tests/input/h3_4.xyz create mode 100644 tests/input/h4_1.xyz create mode 100644 tests/input/h4_3.xyz create mode 100644 tests/input/h4_5.xyz diff --git a/scripts/compilation/cache_compile.py b/scripts/compilation/cache_compile.py index 440f6498..473976e7 100755 --- a/scripts/compilation/cache_compile.py +++ b/scripts/compilation/cache_compile.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Save the .o from a .f90 -and is the .o is asked a second time, retur it +and is the .o is asked a second time, return it Take in argv command like: ifort -g -openmp -I IRPF90_temp/Ezfio_files/ -c IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.F90 -o IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.o """ diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0705d103..b8fa2895 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -88,6 +88,10 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo + if ((subset == 1).and.(sum(hole_mask(:,2)) == 0_bit_kind)) then + ! No beta electron to excite + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b) + endif call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -142,7 +146,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d use selection_types implicit none BEGIN_DOC -! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted +! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC integer, intent(in) :: i_generator, subset, csubset @@ -237,7 +241,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d enddo ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4 - ! Remove also contributions < 1.d-20) do j=1,N_det_alpha_unique call get_excitation_degree_spin(psi_det_alpha_unique(1,j), & psi_det_generators(1,1,i_generator), nt, N_int) @@ -480,7 +483,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do s2=s1,2 sp = s1 - if(s1 /= s2) sp = 3 + if(s1 /= s2) then + sp = 3 + endif ib = 1 if(s1 == s2) ib = i1+1 @@ -528,7 +533,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + +BEGIN_TEMPLATE + +subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -562,7 +570,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d s1 = sp s2 = sp end if - call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + + if ($IS_DOUBLE) then + if (h2 == 0) then + print *, 'h2=0 in '//trim(irp_here) + stop + endif + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + else + if (h2 /= 0) then + print *, 'h2 /= in '//trim(irp_here) + stop + endif + call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, mask, ok, N_int) + endif E_shift = 0.d0 if (h0_type == 'CFG') then @@ -570,12 +591,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j) endif - do p1=1,mo_num - if(bannedOrb(p1, s1)) cycle + $DO_p1 +! do p1=1,mo_num + + if (bannedOrb(p1, s1)) cycle ib = 1 if(sp /= 3) ib = p1+1 - do p2=ib,mo_num + $DO_p2 + ! do p2=ib,mo_num ! ----- ! /!\ Generating only single excited determinants doesn't work because a @@ -584,9 +608,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! detected as already generated when generating in the future with a ! double excitation. ! ----- - - if(bannedOrb(p2, s2)) cycle - if(banned(p1,p2)) cycle + if ($IS_DOUBLE) then + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + endif if(pseudo_sym)then if(dabs(mat(1, p1, p2)).lt.thresh_sym)then @@ -596,7 +621,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d val = maxval(abs(mat(1:N_states, p1, p2))) if( val == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + if ($IS_DOUBLE) then + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + else + call apply_particle(mask, s1, p1, det, ok, N_int) + endif if (do_only_cas) then integer, external :: number_of_holes, number_of_particles @@ -797,7 +826,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection if (h0_type == 'CFG') then - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & / c0_weight(istate) else w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) @@ -857,10 +886,19 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) end if - end do - end do + ! enddo + $ENDDO_p1 +! enddo + $ENDDO_p2 end +SUBST [ DOUBLE , DO_p1 , ENDDO_p1 , DO_p2 , ENDDO_p2 , IS_DOUBLE ] + +double ; do p1=1,mo_num ; enddo ; do p2=ib,mo_num ; enddo ; .True. ;; +single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; + +END_TEMPLATE + subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none @@ -882,6 +920,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp psi_det_sorted mat = 0d0 + p=0 do i=1,N_int negMask(i,1) = not(mask(i,1)) @@ -1435,7 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,sp) p2 = p(2,sp) do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle + if (bannedOrb(puti, sp)) cycle call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num @@ -1446,7 +1485,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj)) + hij = hij_cache1(putj) - hij_cache2(putj) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1506,7 +1545,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none BEGIN_DOC -! Identify the determinants in det which are in the internal space. These are +! Identify the determinants in det that are in the internal space. These are ! the determinants that can be produced by creating two particles on the mask. END_DOC @@ -1534,7 +1573,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - ! If det(i) < det(i_gen), it hs already been considered + ! If det(i) < det(i_gen), it has already been considered if(interesting(i) < i_gen) then fullMatch = .true. return @@ -1585,352 +1624,4 @@ end -! OLD unoptimized routines for debugging -! ====================================== - -subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer :: i, j, s, h1, h2, p1, p2, puti, putj - double precision :: hij, phase - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - integer :: bant - bant = 1 - - - if(sp == 3) then ! AB - h1 = p(1,1) - h2 = p(1,2) - do p1=1, mo_num - if(bannedOrb(p1, 1)) cycle - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = mo_two_e_integral(p1, p2, h1, h2) * phase - end if - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do - else ! AA BB - p1 = p(1,sp) - p2 = p(2,sp) - do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if -end - -subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j - integer :: hfix, pfix, h1, h2, p1, p2, ib - - integer, parameter :: turn2(2) = (/2,1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - - - allocate (lbanned(mo_num, 2)) - lbanned = bannedOrb - - do i=1, p(0,1) - lbanned(p(i,1), 1) = .true. - end do - do i=1, p(0,2) - lbanned(p(i,2), 2) = .true. - end do - - ma = 1 - if(p(0,2) >= 2) ma = 2 - mi = turn2(ma) - - bant = 1 - - if(sp == 3) then - !move MA - if(ma == 2) bant = 2 - puti = p(1,mi) - hfix = h(1,ma) - p1 = p(1,ma) - p2 = p(2,ma) - if(.not. bannedOrb(puti, mi)) then - tmp_row = 0d0 - do putj=1, hfix-1 - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - do putj=hfix+1, mo_num - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - - if(ma == 1) then - mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) - else - mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) - end if - end if - - !MOVE MI - pfix = p(1,mi) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,mi)) cycle - !p1 fixed - putj = p1 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p2 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - - if(mi == 1) then - mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) - mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) - else - mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) - mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) - end if - else - if(p(0,ma) == 3) then - do i=1,3 - hfix = h(1,ma) - puti = p(i, ma) - p1 = p(turn3(1,i), ma) - p2 = p(turn3(2,i), ma) - tmp_row = 0d0 - do putj=1,hfix-1 - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - do putj=hfix+1,mo_num - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - - mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) - mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) - end do - else - hfix = h(1,mi) - pfix = p(1,mi) - p1 = p(1,ma) - p2 = p(2,ma) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,ma)) cycle - putj = p2 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p1 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) - mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) - mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) - mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) - end if - end if - deallocate(lbanned) - - !! MONO - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - do i1=1,p(0,s1) - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=ib,p(0,s2) - p1 = p(i1,s1) - p2 = p(i2,s2) - if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do -end - -subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(2,N_int) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - double precision, external :: get_phase_bi, mo_two_e_integral - - integer :: i, j, tip, ma, mi, puti, putj - integer :: h1, h2, p1, p2, i1, i2 - double precision :: hij, phase - - integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) - integer, parameter :: turn2(2) = (/2, 1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - bant = 1 - - tip = p(0,1) * p(0,2) - - ma = sp - if(p(0,1) > p(0,2)) ma = 1 - if(p(0,1) < p(0,2)) ma = 2 - mi = mod(ma, 2) + 1 - - if(sp == 3) then - if(ma == 2) bant = 2 - - if(tip == 3) then - puti = p(1, mi) - do i = 1, 3 - putj = p(i, ma) - if(banned(putj,puti,bant)) cycle - i1 = turn3(1,i) - i2 = turn3(2,i) - p1 = p(i1, ma) - p2 = p(i2, ma) - h1 = h(1, ma) - h2 = h(2, ma) - - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) - if(ma == 1) then - mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij - else - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end do - else - h1 = h(1,1) - h2 = h(1,2) - do j = 1,2 - putj = p(j, 2) - p2 = p(turn2(j), 2) - do i = 1,2 - puti = p(i, 1) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if - - else - if(tip == 0) then - h1 = h(1, ma) - h2 = h(2, ma) - do i=1,3 - puti = p(i, ma) - do j=i+1,4 - putj = p(j, ma) - if(banned(puti,putj,1)) cycle - - i1 = turn2d(1, i, j) - i2 = turn2d(2, i, j) - p1 = p(i1, ma) - p2 = p(i2, ma) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - else if(tip == 3) then - h1 = h(1, mi) - h2 = h(1, ma) - p1 = p(1, mi) - do i=1,3 - puti = p(turn3(1,i), ma) - putj = p(turn3(2,i), ma) - if(banned(puti,putj,1)) cycle - p2 = p(i, ma) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) - mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij - end do - else ! tip == 4 - puti = p(1, sp) - putj = p(2, sp) - if(.not. banned(puti,putj,1)) then - p1 = p(1, mi) - p2 = p(2, mi) - h1 = h(1, mi) - h2 = h(2, mi) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end if - end if -end - diff --git a/src/cipsi/selection_old.irp.f b/src/cipsi/selection_old.irp.f new file mode 100644 index 00000000..8fd5bc2b --- /dev/null +++ b/src/cipsi/selection_old.irp.f @@ -0,0 +1,350 @@ + +! OLD unoptimized routines for debugging +! ====================================== + +subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = mo_two_e_integral(p1, p2, h1, h2) * phase + end if + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num +! do not cycle here? otherwise singles will be missed?? + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if +end + +subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + allocate (lbanned(mo_num, 2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + do putj=hfix+1,mo_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do +end + +subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(2,N_int) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij + else + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) + mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end if + end if +end + diff --git a/src/cipsi/selection_singles.irp.f b/src/cipsi/selection_singles.irp.f new file mode 100644 index 00000000..3821576c --- /dev/null +++ b/src/cipsi/selection_singles.irp.f @@ -0,0 +1,356 @@ +use bitmasks + +subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! Select determinants connected to i_det by H + END_DOC + integer, intent(in) :: i_gen + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + logical, allocatable :: banned(:,:), bannedOrb(:) + double precision, allocatable :: mat(:,:,:) + integer :: i, j, k + integer :: h1,h2,s1,s2,i1,i2,ib,sp + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + logical :: fullMatch, ok + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) + enddo + + allocate(banned(mo_num,mo_num), bannedOrb(mo_num), mat(N_states, mo_num, 1)) + banned = .False. + + ! Create lists of holes and particles + ! ----------------------------------- + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do sp=1,2 + do i=1, N_holes(sp) + h1 = hole_list(i,sp) + call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) + bannedOrb = .true. + do j=1,N_particles(sp) + bannedOrb(particle_list(j, sp)) = .false. + end do + call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) + if(fullMatch) cycle + mat = 0d0 + call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, mat(1,1,1)) + call fill_buffer_single(i_gen, sp, h1, 0, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + end do + enddo +end subroutine + + +subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N, sp + logical, intent(inout) :: banned(mo_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3), nt + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + nt = 0 + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) + end do + + if(nt > 3) cycle + + if(nt <= 2 .and. i < i_gen) then + fullMatch = .true. + return + end if + + call bitstring_to_list(myMask(1,sp), list(1), na, N_int) + + if(nt == 3 .and. i < i_gen) then + do j=1,na + banned(list(j)) = .true. + end do + else if(nt == 1 .and. na == 1) then + banned(list(1)) = .true. + end if + end do genl +end subroutine + + +subroutine splash_p(mask, sp, det, coefs, N_sel, bannedOrb, vect) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) + double precision, intent(in) :: coefs(N_states, N_sel) + integer, intent(in) :: sp, N_sel + logical, intent(inout) :: bannedOrb(mo_num) + double precision, intent(inout) :: vect(N_states, mo_num) + + integer :: i, j, h(0:2,2), p(0:3,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int, 2) + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 3) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + call get_mask_phase(psi_det_sorted(1,1,i), phasemask, N_int) + + if(nt == 3) then + call get_m2(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else if(nt == 2) then + call get_m1(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else + call get_m0(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + end if + end do +end subroutine + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, mo_two_e_integral + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) + hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = mo_two_e_integral(pfix, pmob, hfix, hmob) + hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) + vect(:, puti) += hij * coefs + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (mo_two_e_integral(p1,p2,h1,h2) - mo_two_e_integral(p2,p1,h1,h2)) + hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end if + end if +end subroutine + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi,mo_two_e_integral + + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, i, hole) - mo_two_e_integral(p2, p1, i, hole)) + hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + do i=hole+1,mo_num + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, hole, i) - mo_two_e_integral(p2, p1, hole, i)) + hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) + vect(:,i) += hij * coefs + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p2) += hij * coefs + else + p2 = p(1, sh) + do i=1,mo_num + if(lbanned(i)) cycle + hij = mo_two_e_integral(p1, p2, i, hole) + hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + end if + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p1) += hij * coefs +end subroutine + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, i) += hij * coefs + end do +end subroutine + + + +! +!subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) +! use bitmasks +! use selection_types +! implicit none +! +! integer, intent(in) :: i_generator, sp, h1 +! double precision, intent(in) :: vect(N_states, mo_num) +! logical, intent(in) :: bannedOrb(mo_num) +! double precision, intent(in) :: fock_diag_tmp(mo_num) +! double precision, intent(in) :: E0(N_states) +! double precision, intent(inout) :: pt2(N_states) +! type(selection_buffer), intent(inout) :: buf +! logical :: ok +! integer :: s1, s2, p1, p2, ib, istate +! integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) +! double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp +! double precision, external :: diag_H_mat_elem_fock +! +! +! call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) +! +! do p1=1,mo_num +! if(bannedOrb(p1)) cycle +! if(vect(1, p1) == 0d0) cycle +! call apply_particle(mask, sp, p1, det, ok, N_int) +! +! +! Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) +! max_e_pert = 0d0 +! +! do istate=1,N_states +! val = vect(istate, p1) + vect(istate, p1) +! delta_E = E0(istate) - Hii +! tmp = dsqrt(delta_E * delta_E + val * val) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif +! e_pert = 0.5d0 * ( tmp - delta_E) +! pt2(istate) += e_pert +! if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert +! end do +! +! if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) +! end do +!end subroutine +! diff --git a/src/ezfio_files/00.create.bats b/src/ezfio_files/00.create.bats index cfa6247d..49430a0b 100644 --- a/src/ezfio_files/00.create.bats +++ b/src/ezfio_files/00.create.bats @@ -23,6 +23,34 @@ function run { qp set mo_two_e_ints io_mo_two_e_integrals "Write" } +@test "H2_1" { + run h2_1.xyz 1 0 cc-pvdz +} + +@test "H2_3" { + run h2_3.xyz 3 0 cc-pvdz +} + +@test "H3_2" { + run h3_2.xyz 2 0 cc-pvdz +} + +@test "H3_4" { + run h3_4.xyz 4 0 cc-pvdz +} + +@test "H4_1" { + run h4_1.xyz 1 0 cc-pvdz +} + +@test "H4_3" { + run h4_3.xyz 3 0 cc-pvdz +} + +@test "H4_5" { + run h4_5.xyz 5 0 cc-pvdz +} + @test "B-B" { qp set_file b2_stretched.ezfio diff --git a/src/fci/40.fci.bats b/src/fci/40.fci.bats index 4523d0e0..3c4a93c7 100644 --- a/src/fci/40.fci.bats +++ b/src/fci/40.fci.bats @@ -10,8 +10,8 @@ function run() { qp set perturbation do_pt2 False qp set determinants n_det_max 8000 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 8 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 8 qp run fci energy1="$(ezfio get fci energy | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh @@ -24,99 +24,134 @@ function run_stoch() { qp set perturbation do_pt2 True qp set determinants n_det_max $3 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 1 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 1 qp run fci energy1="$(ezfio get fci energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh } -@test "B-B" { +@test "H2_1" { # 1s + qp set_file h2_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.06415255 1.e-8 10000 +} + +@test "H2_3" { # 1s + qp set_file h2_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -0.96029881 1.e-8 10000 +} + +@test "H3_2" { # 3s + qp set_file h3_2.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.61003132 1.e-8 10000 +} + +@test "H3_4" { # 2s + qp set_file h3_4.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.02434843 1.e-8 10000 +} + +@test "H4_1" { # 13s + qp set_file h4_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -2.01675062 1.e-8 10000 +} + +@test "H4_3" { # 10s + qp set_file h4_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.95927626 1.e-8 10000 +} + +@test "H4_5" { # 3s + qp set_file h4_5.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.25852765 1.e-8 10000 +} + +@test "B-B" { # 10s qp set_file b2_stretched.ezfio qp set determinants n_det_max 10000 qp set_frozen_core run_stoch -49.14103054419 3.e-4 10000 } -@test "F2" { # 4.07m - [[ -n $TRAVIS ]] && skip - qp set_file f2.ezfio - qp set_frozen_core - run_stoch -199.304922384814 3.e-3 100000 -} - -@test "NH3" { # 10.6657s +@test "NH3" { # 8s qp set_file nh3.ezfio qp set_mo_class --core="[1-4]" --act="[5-72]" run -56.244753429144986 3.e-4 100000 } -@test "DHNO" { # 11.4721s +@test "DHNO" { # 8s qp set_file dhno.ezfio qp set_mo_class --core="[1-7]" --act="[8-64]" - run -130.459020029816 3.e-4 100000 + run -130.466208113547 3.e-4 100000 } -@test "HCO" { # 12.2868s +@test "HCO" { # 32s qp set_file hco.ezfio - run -113.393356604085 1.e-3 100000 + run -113.395751656985 1.e-3 100000 } -@test "H2O2" { # 12.9214s +@test "H2O2" { # 21s qp set_file h2o2.ezfio qp set_mo_class --core="[1-2]" --act="[3-24]" --del="[25-38]" run -151.005848404095 1.e-3 100000 } -@test "HBO" { # 13.3144s +@test "HBO" { # 18s [[ -n $TRAVIS ]] && skip qp set_file hbo.ezfio - run -100.213 1.5e-3 100000 + run -100.214 1.5e-3 100000 } -@test "H2O" { # 11.3727s +@test "H2O" { # 16s [[ -n $TRAVIS ]] && skip qp set_file h2o.ezfio - run -76.2361605151999 5.e-4 100000 + run -76.238051555276 5.e-4 100000 } -@test "ClO" { # 13.3755s +@test "ClO" { # 47s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio - run -534.546453546852 1.e-3 100000 + run -534.548529710256 1.e-3 100000 } -@test "SO" { # 13.4952s +@test "SO" { # 23s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio run -26.015 3.e-3 100000 } -@test "H2S" { # 13.6745s +@test "H2S" { # 37s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio - run -398.859577605891 5.e-4 100000 + run -398.864853669111 5.e-4 100000 } -@test "OH" { # 13.865s +@test "OH" { # 12s [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio - run -75.6121856748294 3.e-4 100000 + run -75.615 1.5e-3 100000 } -@test "SiH2_3B1" { # 13.938ss +@test "SiH2_3B1" { # 10s [[ -n $TRAVIS ]] && skip qp set_file sih2_3b1.ezfio - run -290.0175411299477 3.e-4 100000 + run -290.0206626734517 3.e-4 100000 } -@test "H3COH" { # 14.7299s +@test "H3COH" { # 33s [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio - run -115.205632960026 1.e-3 100000 + run -115.206784386204 1.e-3 100000 } -@test "SiH3" { # 15.99s +@test "SiH3" { # 15s [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio run -5.572 1.e-3 100000 @@ -132,7 +167,7 @@ function run_stoch() { @test "ClF" { # 16.8864s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio - run -559.169748890031 1.5e-3 100000 + run -559.174371468224 1.5e-3 100000 } @test "SO2" { # 17.5645s @@ -170,7 +205,6 @@ function run_stoch() { run -187.970184372047 1.6e-3 100000 } - @test "[Cu(NH3)4]2+" { # 25.0417s [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio @@ -185,3 +219,10 @@ function run_stoch() { run -93.078 2.e-3 100000 } +@test "F2" { # 4.07m + [[ -n $TRAVIS ]] && skip + qp set_file f2.ezfio + qp set_frozen_core + run_stoch -199.304922384814 3.e-3 100000 +} + diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index df566032..3647b775 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -48,6 +48,35 @@ good=-92.76613324421798 rm -rf $EZFIO } + +@test "H2_1" { # 1s + run h2_1.ezfio -1.005924963288527 +} + +@test "H2_3" { # 1s + run h2_3.ezfio -0.9591011604845440 +} + +@test "H3_2" { # 1s + run h3_2.ezfio -1.558273529860488 +} + +@test "H3_4" { # 1s + run h3_4.ezfio -1.0158684760025190 +} + +@test "H4_1" { # 1s + run h4_1.ezfio -1.932022805374405 +} + +@test "H4_3" { # 1s + run h4_3.ezfio -1.8948449927787350 +} + +@test "H4_5" { # 1s + run h4_5.ezfio -1.2408338805496990 +} + @test "point charges" { run_pt_charges } diff --git a/src/tools/print_wf.irp.f b/src/tools/print_wf.irp.f index 64eb1a1f..9621ee89 100644 --- a/src/tools/print_wf.irp.f +++ b/src/tools/print_wf.irp.f @@ -47,6 +47,7 @@ subroutine routine do i = 1, min(N_det_print_wf,N_det) print*,'' print*,'i = ',i + print *,psi_det_sorted(1,1,i) call debug_det(psi_det_sorted(1,1,i),N_int) call get_excitation_degree(psi_det_sorted(1,1,i),psi_det_sorted(1,1,1),degree,N_int) print*,'degree = ',degree diff --git a/tests/input/h2_1.xyz b/tests/input/h2_1.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_1.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h2_3.xyz b/tests/input/h2_3.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_3.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h3_2.xyz b/tests/input/h3_2.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_2.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h3_4.xyz b/tests/input/h3_4.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_4.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h4_1.xyz b/tests/input/h4_1.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_1.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_3.xyz b/tests/input/h4_3.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_3.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_5.xyz b/tests/input/h4_5.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_5.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + From 93adc8d6c1316f95151fe7ab32829db657cbd34c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:57:28 +0200 Subject: [PATCH 69/79] no aaa // --- .../normal_ordered_contractions.irp.f | 539 +++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 40 ++ 2 files changed, 560 insertions(+), 19 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index f066c958..d11c6727 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -500,7 +500,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aaa_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] BEGIN_DOC ! @@ -508,12 +508,12 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! h1 < h2 ! p1 > p2 ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! else: ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) @@ -536,7 +536,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) - print*,' Providing no_aaa_contraction ...' + print*,' Providing no_aaa_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -556,7 +556,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ if(Ne(2) .lt. 3) then - no_aaa_contraction = 0.d0 + no_aaa_contraction_v0 = 0.d0 else @@ -630,7 +630,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -662,7 +662,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -736,7 +736,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -776,7 +776,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -849,7 +849,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -881,7 +881,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -955,7 +955,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -995,7 +995,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1010,19 +1010,19 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2, tmpvec_3) - no_aaa_contraction = -0.5d0 * no_aaa_contraction + no_aaa_contraction_v0 = -0.5d0 * no_aaa_contraction_v0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + !$OMP SHARED (no_aaa_contraction_v0, mo_num) !$OMP DO do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) -= no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1034,7 +1034,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) = -no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1046,17 +1046,18 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = h1+1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + no_aaa_contraction_v0(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL endif call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + print*,' Wall time for no_aaa_contraction_v0', wall1-wall0 END_PROVIDER @@ -1519,3 +1520,503 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12