From 86b48454127d011a0d361dd651a6ef6ef2b798d5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 26 May 2023 08:10:18 +0200 Subject: [PATCH 01/18] IO TC normal ordering added --- src/tc_bi_ortho/normal_ordered.irp.f | 147 +++++++++++++++------------ src/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 90 insertions(+), 63 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 8adc7a63..f9728d05 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -15,79 +18,97 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer(bit_kind), allocatable :: key_i_core(:,:) double precision :: hthree_aba,hthree_aaa,hthree_aab double precision :: wall0,wall1 - - PROVIDE N_int - allocate( occ(N_int*bit_kind_size,2) ) - allocate( key_i_core(N_int,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth = 0.d0 - print*,'Providing normal_two_body_bi_orth ...' + print*,' Providing normal_two_body_bi_orth ...' call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - + if(read_tc_norm_ord) then - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") + read(11) normal_two_body_bi_orth + close(11) - ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif call wall_time(wall1) - print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0 - - deallocate( occ ) - deallocate( key_i_core ) + print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 3a26a6eb..de638da9 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -202,6 +202,12 @@ doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_l interface: ezfio,provider,ocaml default: None +[io_tc_norm_ord] +type: Disk_access +doc: Read/Write normal_two_body_bi_orth from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [debug_tc_pt2] type: integer doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body From b8bfab1d7cd8576c9597d92f70822d903628a6a6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 27 May 2023 22:34:40 +0200 Subject: [PATCH 02/18] start working on NO --- src/tc_bi_ortho/normal_ordered.irp.f | 252 ++++++++++++++++----------- 1 file changed, 150 insertions(+), 102 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 8adc7a63..c30cd1ef 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -8,13 +11,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aba,hthree_aaa,hthree_aab - double precision :: wall0,wall1 + + print*,' Providing normal_two_body_bi_orth ...' + call wall_time(wall0) PROVIDE N_int @@ -23,23 +29,21 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 - print*,'Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + normal_two_body_bi_orth(1:mo_num,1:mo_num,1:mo_num,1:mo_num) = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) do pp1 = 1, n_act_orb @@ -48,50 +52,57 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ h2 = list_act(hh2) do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - ! opposite spin double excitations : s1 /= s2 call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 .lt. h2) .and. (p1 .gt. p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0 * (hthree_aba + hthree_aab + hthree_aaa) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0 + !$OMP END DO + !$OMP END PARALLEL deallocate( occ ) deallocate( key_i_core ) + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 + END_PROVIDER - +! --- subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) @@ -106,30 +117,41 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) !!!! double alpha/beta hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12) + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) enddo + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12) + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) enddo -end subroutine give_aba_contraction - + return +end +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] @@ -152,29 +174,31 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, allocate( key_i_core(N_int,2) ) allocate( occ(N_int*bit_kind_size,2) ) - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) endif + normal_two_body_bi_orth_ab = 0.d0 do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo enddo - enddo enddo deallocate( key_i_core ) @@ -182,7 +206,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, END_PROVIDER - +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] @@ -250,13 +274,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_ END_PROVIDER - +! --- subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC -! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 END_DOC + use bitmasks ! you need to include the bitmasks_module.f90 features implicit none @@ -270,48 +295,64 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 ) + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) enddo + do ii = Ne(2)+1,Ne(1) ! purely open-shell part i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) - int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) - int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) - int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) - int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 )) + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo -end subroutine give_aaa_contraction - + return +end +! --- subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) double precision, intent(out) :: hthree integer :: ii, i double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 @@ -320,11 +361,18 @@ subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct - int_exc_23 + + hthree += 1.d0 * int_direct - int_exc_23 enddo -end subroutine give_aab_contraction + return +end + +! --- + From 5ab6a1d7fba6fbff88ac858747783bb9292b9a89 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 1 Jun 2023 19:59:25 +0200 Subject: [PATCH 03/18] few modifs --- src/bi_ort_ints/semi_num_ints_mo.irp.f | 5 +++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- src/non_h_ints_mu/grad_squared.irp.f | 21 ++++++++++- src/non_h_ints_mu/new_grad_tc.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 6 +++ src/non_h_ints_mu/total_tc_int.irp.f | 5 +++ src/tc_scf/rh_tcscf_diis.irp.f | 39 +++++++++++++++++++- src/tc_scf/tc_scf.irp.f | 4 ++ 8 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 0d727785..771d3274 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -138,10 +138,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao + endif call wall_time(wall1) print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -200,6 +203,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, enddo enddo + FREE int2_grad1_u12_bimo_transp + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..5a3730b3 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -79,7 +79,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 3f1a9bf5..44a6ae65 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -231,6 +231,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time0) PROVIDE j1b_type + PROVIDE int2_grad1u2_grad2u2_j1b2 do ipoint = 1, n_points_final_grid tmp1 = v_1b(ipoint) @@ -242,6 +243,8 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g enddo enddo + FREE int2_grad1u2_grad2u2_j1b2 + !if(j1b_type .eq. 0) then ! grad12_j12 = 0.d0 ! do ipoint = 1, n_points_final_grid @@ -262,6 +265,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -278,6 +282,9 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g print*, ' providing u12sq_j1bsq ...' call wall_time(time0) + ! do not free here + PROVIDE int2_u2_j1b2 + do ipoint = 1, n_points_final_grid tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -292,6 +299,7 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -310,6 +318,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...' call wall_time(time0) + PROVIDE int2_u_grad1u_j1b2 + PROVIDE int2_u_grad1u_x_j1b2 + do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) @@ -340,14 +351,17 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, enddo enddo + FREE int2_u_grad1u_j1b2 + FREE int2_u_grad1u_x_j1b2 + call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 + call print_memory_usage() END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -401,6 +415,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) + FREE int2_grad1_u12_square_ao + ! --- if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then @@ -442,6 +458,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) + + FREE int2_u2_j1b2 endif ! --- @@ -478,6 +496,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 24e7e743..499ffe9d 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -284,6 +284,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d5995ae5..8251fc71 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -176,6 +176,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() END_PROVIDER @@ -242,6 +243,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq grad12_j12 + else PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 @@ -262,6 +265,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + endif elseif(j1b_type .ge. 100) then @@ -324,6 +329,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p call wall_time(time1) print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 450bbef0..2034872a 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,8 +84,13 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif + FREE tc_grad_square_ao + FREE tc_grad_and_lapl_ao + FREE ao_two_e_coul + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 20260a95..0504373c 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -11,6 +11,7 @@ subroutine rh_tcscf_diis() integer :: i, j, it integer :: dim_DIIS, index_dim_DIIS + logical :: converged double precision :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta double precision :: tc_grad, g_save, g_delta, g_delta_th double precision :: level_shift_save, rate_th @@ -92,8 +93,9 @@ subroutine rh_tcscf_diis() PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + converged = .false. !do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. dsqrt(thresh_tcscf))) - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) + do while(.not. converged) call wall_time(t0) @@ -218,21 +220,56 @@ subroutine rh_tcscf_diis() !g_delta_th = dabs(tc_grad) ! g_delta) er_delta_th = dabs(er_DIIS) !er_delta) + converged = er_DIIS .lt. dsqrt(thresh_tcscf) + call wall_time(t1) !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & ! it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + +! Write data in JSON file + + call lock_io + if (it == 1) then + write(json_unit, json_dict_uopen_fmt) + else + write(json_unit, json_dict_close_uopen_fmt) + endif + write(json_unit, json_int_fmt) ' iteration ', it + write(json_unit, json_real_fmt) ' SCF TC Energy ', etc_tot + write(json_unit, json_real_fmt) ' E(1e) ', etc_1e + write(json_unit, json_real_fmt) ' E(2e) ', etc_2e + write(json_unit, json_real_fmt) ' E(3e) ', etc_3e + write(json_unit, json_real_fmt) ' delta Energy ', e_delta + write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS + write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf + write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 + call unlock_io + if(er_delta .lt. 0.d0) then call ezfio_set_tc_scf_bitc_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + call lock_io + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + call unlock_io if(qp_stop()) exit enddo + write(json_unit, json_dict_close_fmtx) + ! --- print *, ' TCSCF DIIS converged !' diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 88ddd26c..04c4f92d 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,6 +8,8 @@ program tc_scf implicit none + write(json_unit,json_array_open_fmt) 'tc-scf' + print *, ' starting ...' my_grid_becke = .True. @@ -57,6 +59,8 @@ program tc_scf endif + write(json_unit,json_array_close_fmtx) + call json_close end From 6d01eb42ca24a4265710b20913d64c9fb3117298 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:16:39 +0200 Subject: [PATCH 04/18] print mem details --- src/bi_ort_ints/three_body_ijm.irp.f | 7 +++++++ src/bi_ort_ints/three_body_ijmk.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 2 ++ src/non_h_ints_mu/total_tc_int.irp.f | 4 +--- 6 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index 4d21cb93..b34638b8 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -49,6 +49,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -102,6 +103,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -155,6 +157,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -208,6 +211,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -261,6 +265,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -306,6 +311,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -359,6 +365,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_ call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 5afd49ab..95b57e37 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -43,6 +43,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -90,6 +91,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -137,6 +139,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -184,6 +187,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -230,6 +234,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -277,6 +282,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..507408e5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -44,6 +44,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -93,6 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -142,6 +144,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -191,6 +194,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -240,6 +244,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -289,6 +294,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index 5a3730b3..f82e8725 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n call wall_time(wall1) print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0 + call print_memory_usage() ! if(write_three_body_ints_bi_ort)then ! print*,'Writing three_body_ints_bi_ort on disk ...' ! call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 8251fc71..b2c0df31 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -100,6 +100,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2034872a..afa10305 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,9 +84,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif - FREE tc_grad_square_ao - FREE tc_grad_and_lapl_ao - FREE ao_two_e_coul + FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 From 107cc3f2fbfb13fdea4bad734c8bc5d11d9d8df8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:19:25 +0200 Subject: [PATCH 05/18] fixed bug in TC-VAR --- src/tc_bi_ortho/tc_utils.irp.f | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..737c393b 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -38,15 +38,16 @@ subroutine write_tc_var() implicit none integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot + double precision :: hmono, htwoe, hthree, htot_1j, htot_j1 double precision :: SIGMA_TC do k = 1, n_states SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) - SIGMA_TC = SIGMA_TC + htot * htot + call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo print *, " state : ", k From 82b2d8bd98e9f3d543b74f766553d28166486094 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:48:23 +0200 Subject: [PATCH 06/18] avoid long name in cosgtos --- .../two_e_Coul_integrals_cosgtos.irp.f | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f index 527a98d5..ea9ff009 100644 --- a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f +++ b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f @@ -29,14 +29,14 @@ double precision function ao_two_e_integral_cosgtos(i, j, k, l) complex*16 :: integral5, integral6, integral7, integral8 complex*16 :: integral_tot - double precision :: ao_two_e_integral_cosgtos_schwartz_accel + double precision :: ao_2e_cosgtos_schwartz_accel complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then !print *, ' with shwartz acc ' - ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + ao_two_e_integral_cosgtos = ao_2e_cosgtos_schwartz_accel(i, j, k, l) else !print *, ' without shwartz acc ' @@ -294,7 +294,7 @@ end function ao_two_e_integral_cosgtos ! --- -double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) +double precision function ao_2e_cosgtos_schwartz_accel(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) @@ -329,7 +329,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + ao_2e_cosgtos_schwartz_accel = 0.d0 dim1 = n_pt_max_integrals @@ -519,8 +519,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -698,8 +697,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -709,11 +707,11 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) deallocate(schwartz_kl) -end function ao_two_e_integral_cosgtos_schwartz_accel +end function ao_2e_cosgtos_schwartz_accel ! --- -BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_2e_cosgtos_schwartz, (ao_num,ao_num)] BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -723,16 +721,16 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,a integer :: i, k double precision :: ao_two_e_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + ao_2e_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) - !$OMP PARALLEL DO PRIVATE(i,k) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_2e_cosgtos_schwartz) & !$OMP SCHEDULE(dynamic) do i = 1, ao_num do k = 1, i - ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) - ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + ao_2e_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_2e_cosgtos_schwartz(k,i) = ao_2e_cosgtos_schwartz(i,k) enddo enddo !$OMP END PARALLEL DO From cab3b12b9b397933ca438717846d28d3164d4804 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:55:51 +0200 Subject: [PATCH 07/18] minor modif in names --- src/tc_bi_ortho/tc_utils.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index e0f29eb8..9023e2f0 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -45,8 +45,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo From 072bea8041a5414da00bc8ddc001186c3c9ff269 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 3 Jun 2023 22:12:30 +0200 Subject: [PATCH 08/18] Improve 4idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 186 +++++- src/bi_ort_ints/three_body_ijm.irp.f | 14 +- src/bi_ort_ints/three_body_ijmk.irp.f | 686 +++++++++++++-------- src/bi_ort_ints/three_body_ijmk_old.irp.f | 290 +++++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 299 +++++---- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 +- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 7 +- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 6 +- 8 files changed, 1071 insertions(+), 420 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmk_old.irp.f diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..bb0424cd 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -8,8 +8,9 @@ program bi_ort_ints my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e - call test_5idx -! call test_5idx2 +! call test_5idx +! call test_5idx2 + call test_4idx end subroutine test_5idx2 @@ -145,3 +146,184 @@ subroutine test_5idx end + +! --- + +subroutine test_4idx() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-5 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_exch12_bi_ort_old +! PROVIDE three_e_4_idx_exch12_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) +! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_exch12_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_cycle_2_bi_ort_old +! PROVIDE three_e_4_idx_cycle_2_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) +! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + return +end diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index b34638b8..ae100fb5 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -23,11 +23,11 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) + !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num do j = 1, mo_num do m = j, mo_num @@ -36,8 +36,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do i = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 95b57e37..39a31751 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -1,287 +1,467 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_4_idx_direct_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) - three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:) + double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp_3d(:,:,:) - three_e_4_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...' + print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP DO COLLAPSE(2) do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 - call print_memory_usage() + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) -END_PROVIDER + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) -! -- + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) - three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) - three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) - three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral - enddo + tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp3) + deallocate(tmp4) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp7(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp7) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp7) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp5) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, tmp6, n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk_old.irp.f b/src/bi_ort_ints/three_body_ijmk_old.irp.f new file mode 100644 index 00000000..1a67f35b --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_old.irp.f @@ -0,0 +1,290 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) + three_e_4_idx_direct_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_direct_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) + three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_1_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! -- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) + three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_2_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) + three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) + three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch13_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) + three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch12_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index d67e1434..3e4412a3 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -19,17 +19,17 @@ end ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint + integer :: i, j, k, m, l + integer :: ipoint + double precision :: wall1, wall0 double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp @@ -38,201 +38,196 @@ end print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + do m = 1, mo_num - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP grad_mli, orb_mat) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) - enddo + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + + enddo + enddo enddo - enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP END DO - !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + deallocate(orb_mat,grad_mli) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - - enddo + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - deallocate(lm_grad_ik) - - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo + !$OMP END PARALLEL DO + + deallocate(lm_grad_ik) + + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) + !$OMP END PARALLEL DO + + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) enddo + deallocate(tmp_mat) call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 882470ed..a2077f0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -9,8 +9,7 @@ subroutine provide_all_three_ints_bi_ortho PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort endif if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif if(.not.double_normal_ord.and.three_e_5_idx_term)then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 7178d6d9..9719a6e7 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -243,7 +243,9 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree, do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO + ! use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree += direct_int - exchange_int enddo else !! ispin NE to ispin_fock @@ -322,7 +324,8 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree -= direct_int - exchange_int enddo else !! ispin NE to ispin_fock diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e725d8e5..3180d946 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -96,9 +96,11 @@ double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i) ! direct - three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations + three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_1_bi_ort(j,m,k,i) & ! two cyclic permutations - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i) & ! two first exchange - - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange + - three_e_4_idx_exch13_bi_ort(j,m,k,i) ! last exchange + ! TODO + ! use transpose end double precision function three_e_double_parrallel_spin(m,l,j,k,i) From a791a28523b787618d571947452fffbc4e7340c6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:19:34 +0200 Subject: [PATCH 09/18] working on memory footprint --- .../grid_becke_vector.irp.f | 18 +++- src/bi_ort_ints/semi_num_ints_mo.irp.f | 23 ++++-- src/bi_ort_ints/three_body_ijmk.irp.f | 82 +++++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 6 +- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 6 -- src/tc_bi_ortho/slater_tc_opt.irp.f | 50 +++++++---- 7 files changed, 121 insertions(+), 72 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..8982fe83 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -62,20 +62,30 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 771d3274..6354b393 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -153,14 +155,14 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' - !call wall_time(wall0) + print *, ' providing int2_grad1_u12_bimo_transp' + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call wall_time(wall1) + print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -188,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp + call wall_time(wall1) + print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 39a31751..ee7e88ef 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -30,10 +30,10 @@ implicit none integer :: ipoint, i, j, k, l, m double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:) - double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) double precision, allocatable :: tmp_3d(:,:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' @@ -47,7 +47,6 @@ allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -55,7 +54,7 @@ !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP tmp1, tmp2, tmp3) !$OMP DO COLLAPSE(2) do i = 1, mo_num do l = 1, mo_num @@ -69,13 +68,9 @@ tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - - tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo @@ -99,7 +94,7 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp1) @@ -116,8 +111,30 @@ enddo !$OMP END PARALLEL DO + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp2) @@ -135,11 +152,10 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp3) - deallocate(tmp4) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -155,8 +171,6 @@ - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -199,26 +213,26 @@ allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp7(n_points_final_grid,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp7) + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) !$OMP DO do i = 1, mo_num do ipoint = 1, n_points_final_grid - tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO !$OMP END PARALLEL - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) do m = 1, mo_num @@ -227,13 +241,13 @@ !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) enddo @@ -243,7 +257,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , tmp5, n_points_final_grid, tmp4, n_points_final_grid & , 0.d0, tmp_3d, mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -264,13 +278,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP mos_l_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do k = 1, mo_num do j = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) @@ -281,7 +295,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & , 0.d0, tmp_3d, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -296,7 +310,7 @@ enddo - deallocate(tmp7) + deallocate(tmp5) deallocate(tmp_3d) @@ -309,13 +323,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, i, & !$OMP mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do j = 1, mo_num do m = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -326,12 +340,12 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) enddo - deallocate(tmp5) + deallocate(tmp4) ! !$OMP PARALLEL DO PRIVATE(i,j,k,m) diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..8667683e 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo + + FREE mos_r_in_r_array END_PROVIDER @@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo + + FREE mos_l_in_r_array END_PROVIDER diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..f9bda058 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,11 +54,13 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif ! --- diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index cc01d144..f8e310df 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE N_int - print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call wall_time(wall1) print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a2077f0f..42c59308 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,24 +1,38 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif + +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord. and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + endif + + return end +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC From 501b9d648702c0f5a2ba0f684ef40ce69d0cb6ce Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:58:29 +0200 Subject: [PATCH 10/18] minor modifs --- .../grid_becke_vector.irp.f | 21 +++++++++++++--- src/bi_ort_ints/bi_ort_ints.irp.f | 25 ++++++++++++++----- src/bi_ort_ints/semi_num_ints_mo.irp.f | 22 ++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 4 --- src/tc_bi_ortho/slater_tc_opt.irp.f | 2 +- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 8982fe83..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -1,10 +1,13 @@ BEGIN_PROVIDER [integer, n_points_final_grid] - implicit none + BEGIN_DOC ! Number of points which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_final_grid = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -16,9 +19,11 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo enddo - print*,'n_points_final_grid = ',n_points_final_grid - print*,'n max point = ',n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) + END_PROVIDER ! --- @@ -41,6 +46,10 @@ END_PROVIDER implicit none integer :: i, j, k, l, i_count double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -65,6 +74,10 @@ END_PROVIDER FREE grid_points_per_atom FREE final_weight_at_r + call wall_time(wall1) + print *, ' wall time for final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index bb0424cd..e64892d7 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -1,22 +1,35 @@ +! --- + program bi_ort_ints - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 10 - my_n_pt_a_grid = 14 - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + !my_n_pt_r_grid = 10 + !my_n_pt_a_grid = 14 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + ! call test_3e ! call test_5idx ! call test_5idx2 - call test_4idx + !call test_4idx + call test_4idx2() end subroutine test_5idx2 PROVIDE three_e_5_idx_cycle_2_bi_ort end +subroutine test_4idx2() + PROVIDE three_e_4_idx_direct_bi_ort +end + subroutine test_3e implicit none integer :: i,k,j,l,m,n,ipoint diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 6354b393..355fa38f 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -54,7 +54,7 @@ BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_poi enddo enddo -! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu END_PROVIDER @@ -161,8 +161,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - print *, ' providing int2_grad1_u12_bimo_transp' - call wall_time(wall0) + !print *, ' providing int2_grad1_u12_bimo_transp' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -180,9 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) - print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !call print_memory_usage() END_PROVIDER @@ -194,8 +194,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, integer :: i, j, ipoint double precision :: wall0, wall1 - call wall_time(wall0) - print *, ' Providing int2_grad1_u12_bimo_t ...' + !call wall_time(wall0) + !print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -212,9 +212,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !call print_memory_usage() END_PROVIDER diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 8667683e..25572854 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -47,8 +47,6 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, enddo enddo - FREE mos_r_in_r_array - END_PROVIDER ! --- @@ -133,8 +131,6 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,m enddo enddo - FREE mos_l_in_r_array - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 42c59308..7acb0d0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -20,7 +20,7 @@ subroutine provide_all_three_ints_bi_ortho() PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif - if(.not. double_normal_ord. and. three_e_5_idx_term) then + if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then PROVIDE normal_two_body_bi_orth From b984d7a1f4a734ae459a3c91e2ca9ee2ea26bc50 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 15:27:07 +0200 Subject: [PATCH 11/18] minor modif --- src/bi_ort_ints/bi_ort_ints.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index e64892d7..5e465d0f 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -27,7 +27,8 @@ subroutine test_5idx2 end subroutine test_4idx2() - PROVIDE three_e_4_idx_direct_bi_ort + !PROVIDE three_e_4_idx_direct_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort end subroutine test_3e From b48e6b269d624ecfabcbb4895c75bd397646c0d8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 01:33:55 +0200 Subject: [PATCH 12/18] start optim normal ordering --- src/tc_bi_ortho/normal_ordered.irp.f | 438 ++++++++++++----------- src/tc_bi_ortho/normal_ordered_old.irp.f | 390 ++++++++++++++++++++ 2 files changed, 625 insertions(+), 203 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_old.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index f8e310df..a092762b 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -14,7 +14,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer :: i, h1, p1, h2, p2 integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: hthree_aaa, hthree_aab double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) @@ -39,57 +39,65 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 + ! opposite spin double excitations : s1 /= s2 + normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) + do pp1 = 1, n_act_orb p1 = list_act(pp1) + do hh2 = 1, n_act_orb h2 = list_act(hh2) + do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 < h2) .and. (p1 > p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -116,178 +124,6 @@ END_PROVIDER ! --- -subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, integral - - !!!! double alpha/beta - hthree = 0.d0 - - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) - enddo - - do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo - - return -end - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: h1, p1, h2, p2, i - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth_ab = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for same spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i,ii,j,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aab, hthree_aaa - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - normal_two_body_bi_orth_aa_bb = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1 , n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1 , n_act_orb - p2 = list_act(pp2) - if(h1h2 - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC @@ -388,3 +224,199 @@ end ! --- +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & + , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + + enddo ! p1 + enddo ! h1 + enddo ! i + + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + + ! TODO + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + endif + + ! --- + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + + !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f new file mode 100644 index 00000000..553cafdb --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -0,0 +1,390 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_old ...' + call wall_time(wall0) + + PROVIDE N_int + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + read(11) normal_two_body_bi_orth_old + close(11) + + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_old = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth_old) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_old + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth_old ', wall1-wall0 + +END_PROVIDER + +! --- + +subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, integral + + !!!! double alpha/beta + hthree = 0.d0 + + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) + enddo + + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + + return +end + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: h1, p1, h2, p2, i + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_ab = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for same spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i,ii,j,h1,p1,h2,p2 + integer :: hh1,hh2,pp1,pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree_aab, hthree_aaa + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op)then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + normal_two_body_bi_orth_aa_bb = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1 , n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1 , n_act_orb + p2 = list_act(pp2) + if(h1h2 + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + BEGIN_DOC + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii,i + double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 + double precision :: integral,int_exc_l,int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13= -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) + enddo + + do ii = Ne(2)+1,Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + enddo + + return +end + +! --- + +subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 + double precision :: integral, int_exc_l, int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct - int_exc_23 + enddo + + return +end + +! --- + From 471283634919dd134e294aa71c0bac0a37d4872c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 11:17:08 +0200 Subject: [PATCH 13/18] normal ordering: aba-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 495 ++++++++++++++++++------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 48 ++- 2 files changed, 411 insertions(+), 132 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index a092762b..59e78b92 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -22,8 +22,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing normal_two_body_bi_orth ...' call wall_time(wall0) - PROVIDE N_int - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -48,12 +46,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif ! opposite spin double excitations : s1 /= s2 - normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) + PROVIDE no_aba_contraction - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & + !$OMP no_aba_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -97,7 +96,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -124,103 +123,103 @@ END_PROVIDER ! --- -subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - BEGIN_DOC - ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii,i - double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 - double precision :: integral,int_exc_l,int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13= -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) - enddo - - do ii = Ne(2)+1,Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23 = -1.d0 * integral - - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) - enddo - - return -end +!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! BEGIN_DOC +! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 +! END_DOC +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii,i +! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 +! double precision :: integral,int_exc_l,int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) +! enddo +! +! do ii = Ne(2)+1,Ne(1) ! purely open-shell part +! i = occ(ii,1) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23 = -1.d0 * integral +! +! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) +! enddo +! +! return +!end ! --- -subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 - double precision :: integral, int_exc_l, int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct - int_exc_23 - enddo - - return -end +!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii, i +! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 +! double precision :: integral, int_exc_l, int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct - int_exc_23 +! enddo +! +! return +!end ! --- @@ -264,6 +263,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) + double precision, allocatable :: tmp_2d(:,:) + allocate(tmp_2d(mo_num,mo_num)) + + ! purely closed shell part do ii = 1, Ne(2) i = occ(ii,2) @@ -313,9 +316,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & - , 0.d0, tmp_3d, mo_num) + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num @@ -364,38 +368,163 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & - , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 enddo ! i - double precision :: integral, int_direct, int_exc_13, int_exc_12 - ! TODO + + + + + + ! purely open-shell part if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral + do h1 = 1, mo_num - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i endif - ! --- + + + + + + + + + + + + + + + deallocate(tmp_3d) deallocate(tmp1, tmp2) @@ -403,17 +532,121 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpvec_1, tmpvec_2) - !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) - do h1 = 1, mo_num - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO + + + + + + + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) +! enddo +! enddo +! enddo +! enddo + + + ! --- + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + +! no_aba_contraction = 0.d0 +! +! ! purely closed shell part +! do ii = 1, Ne(2) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo + +! ! purely open-shell part +! if(Ne(2) < Ne(1)) then +! +! do ii = Ne(2) + 1, Ne(1) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo +! endif + + ! --- + +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, integral) & +! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) +! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + END_PROVIDER diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index df86ea65..33b5c5aa 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,12 +11,14 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_h_u0 +! call test_h_u0 ! call test_slater_tc_opt ! call timing_tot ! call timing_diag ! call timing_single ! call timing_double + + call test_no() end subroutine test_h_u0 @@ -252,3 +254,47 @@ subroutine timing_double end +! --- + +subroutine test_no() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing normal_two_body_bi_orth ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_old + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 + + return +end + +! --- + + From 3a5dd05d7eb61dc21b1ec16eba330e3687b54001 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 15:13:18 +0200 Subject: [PATCH 14/18] NO: working on AAB contractions --- src/tc_bi_ortho/normal_ordered.irp.f | 428 +++++++++++---------------- src/utils/util.irp.f | 37 ++- 2 files changed, 217 insertions(+), 248 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 59e78b92..b3c413d3 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -45,14 +45,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - ! opposite spin double excitations : s1 /= s2 PROVIDE no_aba_contraction + PROVIDE no_aab_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction) + !$OMP no_aba_contraction,no_aab_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -72,9 +72,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! same spin double excitations : s1 == s2 if((h1 < h2) .and. (p1 > p2)) then - ! with opposite spin contributions - call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 - ! same spin double excitations with same spin contributions if(Ne(2) .ge. 3) then call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 @@ -84,9 +81,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2) .ge. 3) then ! same spin double excitations with same spin contributions call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) @@ -96,7 +90,9 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & + + no_aab_contraction(p2,h2,p1,h1) & + + 0.5d0 * hthree_aaa enddo enddo enddo @@ -123,106 +119,6 @@ END_PROVIDER ! --- -!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! BEGIN_DOC -! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 -! END_DOC -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii,i -! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 -! double precision :: integral,int_exc_l,int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) -! enddo -! -! do ii = Ne(2)+1,Ne(1) ! purely open-shell part -! i = occ(ii,1) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23 = -1.d0 * integral -! -! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) -! enddo -! -! return -!end - -! --- - -!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii, i -! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 -! double precision :: integral, int_exc_l, int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct - int_exc_23 -! enddo -! -! return -!end - -! --- - BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -236,6 +132,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp_3d(:,:,:) double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) print*,' Providing no_aba_contraction ...' call wall_time(wall0) @@ -262,8 +159,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) - - double precision, allocatable :: tmp_2d(:,:) allocate(tmp_2d(mo_num,mo_num)) @@ -386,13 +281,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! i - - - - - - - ! purely open-shell part if(Ne(2) < Ne(1)) then do ii = Ne(2) + 1, Ne(1) @@ -510,146 +398,192 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - - - - - - - - - - - - - - - - deallocate(tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) - - - - - - - - no_aba_contraction = -0.5d0 * no_aba_contraction call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) -! enddo -! enddo -! enddo -! enddo - - - ! --- - - double precision :: integral, int_direct, int_exc_13, int_exc_12 - -! no_aba_contraction = 0.d0 -! -! ! purely closed shell part -! do ii = 1, Ne(2) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo - -! ! purely open-shell part -! if(Ne(2) < Ne(1)) then -! -! do ii = Ne(2) + 1, Ne(1) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo -! endif - - ! --- - -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, integral) & -! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) -! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - - + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = 0.5d0 * no_aab_contraction + call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + + +END_PROVIDER + +! --- diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index aba99c2b..a9f1a438 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -490,7 +490,7 @@ end subroutine check_sym subroutine sum_A_At(A, N) !BEGIN_DOC - ! useful for symmetrizing a tensor without a temporary tensor + ! add a tensor with its transpose without a temporary tensor !END_DOC implicit none @@ -521,3 +521,38 @@ subroutine sum_A_At(A, N) end +! --- + +subroutine sub_A_At(A, N) + + !BEGIN_DOC + ! substruct a tensor with its transpose without a temporary tensor + !END_DOC + + implicit none + integer, intent(in) :: N + double precision, intent(inout) :: A(N,N) + integer :: i, j + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j) & + !$OMP SHARED (A, N) + !$OMP DO + do j = 1, N + do i = j, N + A(i,j) -= A(j,i) + enddo + enddo + !$OMP END DO + + !$OMP DO + do j = 2, N + do i = 1, j-1 + A(i,j) = -A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end From b0da0ac04d49b3fbbbe0eb9649b0b6da87cce6d2 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 16:08:46 +0200 Subject: [PATCH 15/18] normal ordering: aab-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 73 +++++++++++++++++++--------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index b3c413d3..3a1e79fd 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -230,11 +230,11 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -398,7 +398,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - deallocate(tmp_3d) + deallocate(tmp_2d, tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -446,12 +446,12 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif + allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) ! purely closed shell part @@ -471,10 +471,10 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) enddo !$OMP END DO !$OMP END PARALLEL @@ -515,17 +515,17 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL @@ -567,9 +567,38 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1) deallocate(tmpvec_1) - no_aab_contraction = 0.5d0 * no_aab_contraction - call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + no_aab_contraction = -0.5d0 * no_aab_contraction + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -579,11 +608,11 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ enddo enddo enddo + !$OMP END PARALLEL call wall_time(wall1) print*,' Wall time for no_aab_contraction', wall1-wall0 - END_PROVIDER ! --- From aafca191f1fe271575f80f5d16eb80587290213c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 20:59:34 +0200 Subject: [PATCH 16/18] normal ordering: aaa-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 593 +++++++++++++++++++++-- src/tc_bi_ortho/normal_ordered_old.irp.f | 4 +- 2 files changed, 568 insertions(+), 29 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 3a1e79fd..fea229c9 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -47,12 +47,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE no_aba_contraction PROVIDE no_aab_contraction + PROVIDE no_aaa_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction,no_aab_contraction) + !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -66,33 +67,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do pp2 = 1, n_act_orb p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - ! same spin double excitations : s1 == s2 - if((h1 < h2) .and. (p1 > p2)) then - - ! same spin double excitations with same spin contributions - if(Ne(2) .ge. 3) then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - - else - - if(Ne(2) .ge. 3) then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - - endif - - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & - + no_aab_contraction(p2,h2,p1,h1) & - + 0.5d0 * hthree_aaa + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) enddo enddo enddo @@ -616,3 +591,565 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 553cafdb..417580dd 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -89,6 +89,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, hthree_aaa = 0.d0 endif endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) enddo enddo @@ -350,7 +351,8 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23 = -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + !hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + hthree += 0.5d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo return From 374a88bc624396370660182f6da3d876934b35b9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:51:52 +0200 Subject: [PATCH 17/18] normal ordering with DGEMM: OK --- src/tc_bi_ortho/normal_ordered.irp.f | 1230 ++++++++--------- .../normal_ordered_contractions.irp.f | 1062 ++++++++++++++ 2 files changed, 1615 insertions(+), 677 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_contractions.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index fea229c9..7259c270 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -11,16 +11,15 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, h1, p1, h2, p2 + integer :: i, ii, h1, p1, h2, p2, ipoint integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aaa, hthree_aab - double precision :: wall0, wall1 + double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + call wall_time(walli) if(read_tc_norm_ord) then @@ -30,6 +29,11 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -45,224 +49,33 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - PROVIDE no_aba_contraction - PROVIDE no_aab_contraction - PROVIDE no_aaa_contraction + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) + ! --- + ! aba contraction - do pp1 = 1, n_act_orb - p1 = list_act(pp1) + print*,' Providing aba_contraction ...' + call wall_time(wall0) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) + tmp = 0.d0 - do pp2 = 1, n_act_orb - p2 = list_act(pp2) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate( occ ) - deallocate( key_i_core ) - endif - - if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") - call ezfio_set_work_empty(.False.) - write(11) normal_two_body_bi_orth - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aba_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - do h1 = 1, mo_num + ! to minimize the number of operations !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & @@ -304,29 +117,30 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo !$OMP END PARALLEL DO + ! to avoid tmp(N^4) do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -355,313 +169,171 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2) + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - no_aba_contraction = -0.5d0 * no_aba_contraction - call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + do h1 = 1, mo_num - call wall_time(wall1) - print*,' Wall time for no_aba_contraction', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aab_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpvec_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END DO + !$OMP END PARALLEL - enddo ! p1 - enddo ! h1 - enddo ! i + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - deallocate(tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1) - deallocate(tmpvec_1) + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO - no_aab_contraction = -0.5d0 * no_aab_contraction + do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aab_contraction, mo_num) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END PARALLEL + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO - call wall_time(wall1) - print*,' Wall time for no_aab_contraction', wall1-wall0 + enddo ! p1 + enddo ! h1 + enddo !i + endif -END_PROVIDER + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmp_2d) -! --- + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 - BEGIN_DOC - ! - ! if: - ! h1 < h2 - ! p1 > p2 - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] - ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! else: - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! - ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) - ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > - ! + < p2 p1 i | i h2 h1 > - ! + < p1 i p2 | i h2 h1 > ] - ! - ! - END_DOC + normal_two_body_bi_orth = tmp - use bitmasks ! you need to include the bitmasks_module.f90 features + ! --- + ! aab contraction - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + print*,' Providing aab_contraction ...' + call wall_time(wall0) - print*,' Providing no_aaa_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - if(Ne(2) .lt. 3) then - - no_aaa_contraction = 0.d0 - - else + tmp = 0.d0 allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmpvec_3(n_points_final_grid,3)) ! purely closed shell part do ii = 1, Ne(2) @@ -677,21 +349,13 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP SHARED (n_points_final_grid, i, h1, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo !$OMP END DO !$OMP END PARALLEL @@ -722,39 +386,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -763,58 +395,32 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! to avoid tmp(N^4) do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) !$OMP DO do h2 = 1, mo_num do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo !$OMP END DO @@ -828,47 +434,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -877,14 +443,85 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! h1 enddo ! i + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + tmp = -0.5d0 * tmp - ! purely open-shell part - if(Ne(2) < Ne(1)) then + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) do h1 = 1, mo_num @@ -932,16 +569,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -964,16 +601,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -1039,15 +676,15 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1074,82 +711,321 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 1.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2, tmp3) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + ! purely open-shell part + if(Ne(2) < Ne(1)) then - no_aaa_contraction = -0.5d0 * no_aaa_contraction + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + ! to avoid tmp(N^4) + do h1 = 1, mo_num - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + call wall_time(wallf) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth ', wallf-walli -END_PROVIDER +END_PROVIDER ! --- + diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f new file mode 100644 index 00000000..855cfd17 --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -0,0 +1,1062 @@ + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- From ee06ddf85e2b3fc83faa25515e80b262a2932aa7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:59:14 +0200 Subject: [PATCH 18/18] free two (3xN_gridxMOxMO) tables in TC-CIPSI --- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index f9bda058..3e6f229b 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -63,7 +63,9 @@ subroutine run_cipsi_tc call provide_all_three_ints_bi_ortho() endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then @@ -78,13 +80,16 @@ subroutine run_cipsi_tc call json_close else + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(elec_alpha_num+elec_beta_num.ge.3)then if(three_body_h_tc)then call provide_all_three_ints_bi_ortho endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi