From eaab1b80648bd2b4ec38aa17722a751fccf42ce0 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 7 Mar 2024 07:34:59 +0100 Subject: [PATCH 01/10] few modif for HHG --- .../multi_s_dipole_moment.irp.f | 23 ++++++++++++++++ src/utils/constants.include.F | 27 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index 913ae2f3..f21e08cd 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -91,3 +91,26 @@ BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] enddo END_PROVIDER + +! --- + + BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval, (N_states)] + + implicit none + + PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment + + call lapack_diag(multi_s_x_dipole_moment_eigenval(1), multi_s_x_dipole_moment_eigenvec(1,1), multi_s_x_dipole_moment(1,1), N_states, N_states) + call lapack_diag(multi_s_y_dipole_moment_eigenval(1), multi_s_y_dipole_moment_eigenvec(1,1), multi_s_y_dipole_moment(1,1), N_states, N_states) + call lapack_diag(multi_s_z_dipole_moment_eigenval(1), multi_s_z_dipole_moment_eigenvec(1,1), multi_s_z_dipole_moment(1,1), N_states, N_states) + +END_PROVIDER + +! --- + + diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F index 422eff95..7b01f888 100644 --- a/src/utils/constants.include.F +++ b/src/utils/constants.include.F @@ -18,3 +18,30 @@ double precision, parameter :: c_4_3 = 4.d0/3.d0 double precision, parameter :: c_1_3 = 1.d0/3.d0 double precision, parameter :: sq_op5 = dsqrt(0.5d0) double precision, parameter :: dlog_2pi = dlog(2.d0*dacos(-1.d0)) + +! physical constants and units conversion factors +double precision, parameter :: k_boltzman_si = 1.38066d-23 ! K k^-1 +double precision, parameter :: k_boltzman_au = 3.1667d-6 ! Hartree k^-1 +double precision, parameter :: k_boltzman_m1_au = 315795.26d0 ! Hartree^-1 k +double precision, parameter :: bohr_radius_si = 0.529177d-10 ! m +double precision, parameter :: bohr_radius_cm = 0.529177d-8 ! cm +double precision, parameter :: bohr_radius_angs = 0.529177d0 ! Angstrom +double precision, parameter :: electronmass_si = 9.10953d-31 ! Kg +double precision, parameter :: electronmass_uma = 5.4858d-4 ! uma +double precision, parameter :: electronvolt_si = 1.6021892d-19 ! J +double precision, parameter :: uma_si = 1.66057d-27 ! Kg +double precision, parameter :: debye_si = 3.33564d-30 ! coulomb meter +double precision, parameter :: debye_au = 0.393427228d0 ! e * Bohr +double precision, parameter :: angstrom_to_au = 1.889727d0 ! au +double precision, parameter :: au_to_ohmcmm1 = 46000.0d0 ! (ohm cm)^-1 +double precision, parameter :: au_to_kb = 294210.0d0 ! kbar +double precision, parameter :: au_to_eV = 27.211652d0 +double precision, parameter :: uma_to_au = 1822.89d0 +double precision, parameter :: au_to_terahertz = 2.4189d-5 +double precision, parameter :: au_to_sec = 2.4189d-17 +double precision, parameter :: au_to_fsec = 2.4189d-2 +double precision, parameter :: Wcm2 = 3.5d16 +double precision, parameter :: amconv = 1.66042d-24/9.1095d-28*0.5d0 ! mass conversion: a.m.u to a.u. (ry) +double precision, parameter :: uakbar = 147105.d0 ! pressure conversion from ry/(a.u)^3 to k + + From d405aea95785060f7550be7901c90d133b287a65 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 11 Mar 2024 10:21:59 +0100 Subject: [PATCH 02/10] few mom opt --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 99 +++++++++++++++---- .../local/non_h_ints_mu/total_tc_int.irp.f | 2 + src/tools/print_detweights.irp.f | 66 +++++++++++++ 3 files changed, 148 insertions(+), 19 deletions(-) create mode 100644 src/tools/print_detweights.irp.f diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 5e6a24e9..bf5cc36f 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -40,38 +40,95 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, END_DOC implicit none - integer :: i, j, k, l, m, n, p, q + integer :: i, j, k, l, m, n, p, q, s, r + double precision :: t1, t2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) + double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) + + print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...' + call wall_time(t1) + call print_memory_usage() PROVIDE mo_r_coef mo_l_coef + PROVIDe ao_two_e_tc_tot - allocate(a2(ao_num,ao_num,ao_num,mo_num)) + if(ao_to_mo_tc_n3) then - call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & - , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num) + print*, ' memory scale of TC ao -> mo: O(N3) ' - allocate(a1(ao_num,ao_num,mo_num,mo_num)) + allocate(a_jkp(ao_num,ao_num,mo_num)) + allocate(a_kpq(ao_num,mo_num,mo_num)) + allocate(a_pqr(mo_num,mo_num,mo_num)) - call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + do s = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 - deallocate(a2) - allocate(a2(ao_num,mo_num,mo_num,mo_num)) + do l = 1, ao_num - call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,l), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a_jkp(1,1,1), ao_num*ao_num) + + call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a_kpq(1,1,1), ao_num*mo_num) + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a_pqr(1,1,1), mo_num*mo_num) - deallocate(a1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, q, r) & + !$OMP SHARED(s, l, mo_num, mo_bi_ortho_tc_two_e_chemist, mo_r_coef, a_pqr) + !$OMP DO COLLAPSE(2) + do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(p,q,r,s) = mo_bi_ortho_tc_two_e_chemist(p,q,r,s) + mo_r_coef(l,s) * a_pqr(p,q,r) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + enddo ! l + enddo ! s - deallocate(a2) + deallocate(a_jkp, a_kpq, a_pqr) + else + + print*, ' memory scale of TC ao -> mo: O(N4) ' + + allocate(a2(ao_num,ao_num,ao_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num) + + allocate(a1(ao_num,ao_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + + deallocate(a2) + allocate(a2(ao_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + + deallocate(a1) + + call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + + deallocate(a2) + + endif !allocate(a1(mo_num,ao_num,ao_num,ao_num)) !a1 = 0.d0 @@ -135,6 +192,10 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, !enddo !deallocate(a1) + call wall_time(t2) + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_chemist (min)', (t2-t1)/60.d0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 9d3cf565..ba078d9b 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -201,6 +201,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL + call clear_ao_map() + if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num endif diff --git a/src/tools/print_detweights.irp.f b/src/tools/print_detweights.irp.f new file mode 100644 index 00000000..d5b0f2c9 --- /dev/null +++ b/src/tools/print_detweights.irp.f @@ -0,0 +1,66 @@ +program print_detweights + + implicit none + + read_wf = .True. + touch read_wf + + call main() + +end + +! --- + +subroutine main() + + implicit none + integer :: i + integer :: degree + integer :: ios + integer, allocatable :: deg(:), ii(:), deg_sorted(:) + double precision, allocatable :: c(:) + + PROVIDE N_int + PROVIDE N_det + PROVIDE psi_det + PROVIDe psi_coef + + allocate(deg(N_det), ii(N_det), deg_sorted(N_det), c(N_det)) + + do i = 1, N_det + + call debug_det(psi_det(1,1,i), N_int) + call get_excitation_degree(psi_det(1,1,i), psi_det(1,1,1), degree, N_int) + + ii (i) = i + deg(i) = degree + c (i) = dabs(psi_coef(i,1)) + enddo + + call dsort(c, ii, N_det) + + do i = 1, N_det + deg_sorted(i) = deg(ii(i)) + enddo + + print *, ' saving psi' + + ! Writing output in binary format + open(unit=10, file="coef.bin", status="replace", action="write", iostat=ios, form="unformatted") + + if(ios /= 0) then + print *, ' Error opening file!' + stop + endif + + write(10) N_det + write(10) deg_sorted + write(10) c + + close(10) + + deallocate(deg, ii, deg_sorted, c) + +end + + From 1dbde5643920054cc16f148c50d84fed01a88b13 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 13 Mar 2024 07:04:54 +0100 Subject: [PATCH 03/10] O(N4) -> O(N3) transformations --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 19 ++++++++++++++++--- plugins/local/tc_keywords/EZFIO.cfg | 7 +++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index bf5cc36f..79bfd336 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -41,7 +41,7 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, implicit none integer :: i, j, k, l, m, n, p, q, s, r - double precision :: t1, t2 + double precision :: t1, t2, tt1, tt2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) @@ -60,9 +60,11 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, allocate(a_kpq(ao_num,mo_num,mo_num)) allocate(a_pqr(mo_num,mo_num,mo_num)) - do s = 1, mo_num - mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 + call wall_time(tt1) + do s = 1, mo_num + + mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 do l = 1, ao_num call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & @@ -93,6 +95,17 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, !$OMP END PARALLEL enddo ! l + + if(s == 2) then + call wall_time(tt2) + print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0 + elseif(s == 11) then + call wall_time(tt2) + print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/600.d0 + endif + enddo ! s deallocate(a_jkp, a_kpq, a_pqr) diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 93ff790f..a8491660 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -280,3 +280,10 @@ doc: approach used to evaluate TC integrals [ analytic | numeric | semi-analytic interface: ezfio,ocaml,provider default: semi-analytic +[ao_to_mo_tc_n3] +type: logical +doc: If |true|, memory scale of TC ao -> mo: O(N3) +interface: ezfio,provider,ocaml +default: False + + From 83ed57312d9bc86dc2ec4cbc486491ded16d7053 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 19 Mar 2024 17:23:41 +0100 Subject: [PATCH 04/10] few modif in ao tc integ --- .../local/bi_ort_ints/total_twoe_pot.irp.f | 25 +++++++++++++++++-- .../local/non_h_ints_mu/tc_integ_num.irp.f | 2 +- .../local/non_h_ints_mu/total_tc_int.irp.f | 3 ++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 79bfd336..1e558038 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -50,7 +50,7 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, call print_memory_usage() PROVIDE mo_r_coef mo_l_coef - PROVIDe ao_two_e_tc_tot + PROVIDE ao_two_e_tc_tot if(ao_to_mo_tc_n3) then @@ -103,9 +103,30 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, elseif(s == 11) then call wall_time(tt2) print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/600.d0 + print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0) + elseif(s == 26) then + call wall_time(tt2) + print*, ' 25 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-25)*(tt2-tt1)/(60.d0*25.d0) + elseif(s == 51) then + call wall_time(tt2) + print*, ' 50 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-50)*(tt2-tt1)/(60.d0*50.d0) + elseif(s == 101) then + call wall_time(tt2) + print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0) + elseif(s == 201) then + call wall_time(tt2) + print*, ' 200 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-200)*(tt2-tt1)/(60.d0*200.d0) + elseif(s == 501) then + call wall_time(tt2) + print*, ' 500 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-500)*(tt2-tt1)/(60.d0*500.d0) endif + enddo ! s deallocate(a_jkp, a_kpq, a_pqr) diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f index e5d75c3d..6d446037 100644 --- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f @@ -131,7 +131,7 @@ deallocate(tmp) call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 + print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num = (min)', (time1-time0) / 60.d0 call print_memory_usage() END_PROVIDER diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index ba078d9b..c7230dc3 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -201,7 +201,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL - call clear_ao_map() + !call clear_ao_map() + FREE ao_integrals_map if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num From d872d60e70f8eedb3913f5566d4f35d198d4aad5 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:29:40 +0200 Subject: [PATCH 05/10] saving olympe2 modif --- plugins/local/bi_ortho_mos/overlap.irp.f | 8 +- .../lapack_diag_non_hermit.irp.f | 1 + plugins/local/tc_scf/minimize_tc_angles.irp.f | 2 +- plugins/local/tc_scf/routines_rotates.irp.f | 79 ++++++++++--------- src/tools/print_detweights.irp.f | 35 +++++++- src/utils/block_diag_degen.irp.f | 2 +- 6 files changed, 81 insertions(+), 46 deletions(-) diff --git a/plugins/local/bi_ortho_mos/overlap.irp.f b/plugins/local/bi_ortho_mos/overlap.irp.f index ff5d5c84..7f07929f 100644 --- a/plugins/local/bi_ortho_mos/overlap.irp.f +++ b/plugins/local/bi_ortho_mos/overlap.irp.f @@ -56,10 +56,10 @@ print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0) print*,'And bi orthogonality is off by an average of ',accu_nd print*,'****************' - print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' - do i = 1, mo_num - write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) - enddo + !print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' + !do i = 1, mo_num + ! write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) + !enddo endif print*,'Average trace of overlap_bi_ortho (should be 1.)' print*,'accu_d = ',accu_d diff --git a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f index cb38347e..4d4bc047 100644 --- a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -2144,6 +2144,7 @@ subroutine impose_biorthog_degen_eigvec(n, deg_num, e0, L0, R0) enddo !print*,' accu_nd after = ', accu_nd if(accu_nd .gt. 1d-12) then + print*, ' accu_nd =', accu_nd print*, ' your strategy for degenerates orbitals failed !' print*, m, 'deg on', i stop diff --git a/plugins/local/tc_scf/minimize_tc_angles.irp.f b/plugins/local/tc_scf/minimize_tc_angles.irp.f index c7752930..e5f6cf87 100644 --- a/plugins/local/tc_scf/minimize_tc_angles.irp.f +++ b/plugins/local/tc_scf/minimize_tc_angles.irp.f @@ -20,7 +20,7 @@ program minimize_tc_angles ! TODO ! check if rotations of orbitals affect the TC energy ! and refuse the step - call minimize_tc_orb_angles + call minimize_tc_orb_angles() end diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f index c42e846e..2c5510f2 100644 --- a/plugins/local/tc_scf/routines_rotates.irp.f +++ b/plugins/local/tc_scf/routines_rotates.irp.f @@ -40,9 +40,6 @@ subroutine LTxSxR(n, m, L, S, R, C) end subroutine LTxR -! --- - - ! --- subroutine minimize_tc_orb_angles() @@ -103,7 +100,10 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) - E_thr = 1d-04 + PROVIDE TC_HF_energy + PROVIDE mo_r_coef mo_l_coef + + E_thr = 1d-07 E_old = TC_HF_energy allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num)) mo_r_coef_old = mo_r_coef @@ -111,7 +111,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) good_angles = .False. - allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) + allocate(mo_l_coef_good(ao_num,mo_num), mo_r_coef_good(ao_num,mo_num)) print *, ' ***************************************' print *, ' ***************************************' @@ -123,7 +123,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) mo_r_coef_good = mo_r_coef mo_l_coef_good = mo_l_coef - allocate(mo_r_coef_new(ao_num, mo_num)) + allocate(mo_r_coef_new(ao_num,mo_num)) mo_r_coef_new = mo_r_coef do i = 1, mo_num norm = 1.d0/dsqrt(overlap_mo_r(i,i)) @@ -141,10 +141,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) if(n_core_orb.ne.0)then - call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) else - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) endif + print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -156,50 +157,52 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) - if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .ge. 1000) n_degen = 1 ! convention for core orbitals if(n_degen .eq. 1) cycle + print*, ' working on orbital', i + print*, ' multiplicity =', n_degen allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen)) allocate(T(n_degen,n_degen), Snew(n_degen,n_degen)) print*,'Right orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) + enddo print*,'Left orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_l_coef(1:ao_num,list_degen(i,j)) + enddo if(angle_left_right(list_degen(i,1)).gt.80.d0.and.n_degen==2)then - integer :: i_list, j_list - i_list = list_degen(i,1) - j_list = list_degen(i,2) - print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) - print*,'i_list = ',i_list - print*,'i_list = ',j_list - print*,'Swapping left/right orbitals' - call print_strong_overlap(i_list, j_list) - mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) - mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) - mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) - mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) + integer :: i_list, j_list + i_list = list_degen(i,1) + j_list = list_degen(i,2) + print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) + print*,'i_list = ',i_list + print*,'i_list = ',j_list + print*,'Swapping left/right orbitals' + call print_strong_overlap(i_list, j_list) + mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) + mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) + mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) + mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) else - do j = 1, n_degen - print*,'i_list = ',list_degen(i,j) - mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) - mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + print*,'i_list = ',list_degen(i,j) + mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) + mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) + enddo endif print*,'Right orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_r_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_tmp(1:ao_num,j) + enddo print*,'Left orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(100(F16.10,X))') mo_l_coef_tmp(1:ao_num,j) + enddo ! Orthogonalization of right functions print *, ' Orthogonalization of RIGHT functions' print *, ' ------------------------------------' diff --git a/src/tools/print_detweights.irp.f b/src/tools/print_detweights.irp.f index d5b0f2c9..5e5f2bb1 100644 --- a/src/tools/print_detweights.irp.f +++ b/src/tools/print_detweights.irp.f @@ -5,7 +5,8 @@ program print_detweights read_wf = .True. touch read_wf - call main() + call print_exc() + !call main() end @@ -41,6 +42,7 @@ subroutine main() do i = 1, N_det deg_sorted(i) = deg(ii(i)) + print *, deg_sorted(i), c(i) enddo print *, ' saving psi' @@ -52,7 +54,7 @@ subroutine main() print *, ' Error opening file!' stop endif - + write(10) N_det write(10) deg_sorted write(10) c @@ -63,4 +65,33 @@ subroutine main() end +! --- + +subroutine print_exc() + + implicit none + + integer :: i + integer, allocatable :: deg(:) + + PROVIDE N_int + PROVIDE N_det + PROVIDE psi_det + + allocate(deg(N_det)) + + do i = 1, N_det + call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i), deg(i), N_int) + enddo + + open(unit=10, file="exc.dat", action="write") + write(10,*) N_det + write(10,*) deg + close(10) + + deallocate(deg) + +end + + diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f index 188bfa58..1a9ca8d6 100644 --- a/src/utils/block_diag_degen.irp.f +++ b/src/utils/block_diag_degen.irp.f @@ -191,7 +191,7 @@ subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list) list_degen(n_degen_list,1) = i icount = 1 do j = i+1, n - if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + if(dabs(A(i)-A(j)).lt.thr .and. is_ok(j)) then is_ok(j) = .False. icount += 1 list_degen(n_degen_list,icount) = j From f8bff471222ac9cd2e6f23342f7d7a7aff5d62cd Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 28 Mar 2024 15:27:11 +0100 Subject: [PATCH 06/10] added loops --- .../local/non_h_ints_mu/total_tc_int.irp.f | 165 +++++++++++++----- 1 file changed, 121 insertions(+), 44 deletions(-) diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index c7230dc3..72fd0f53 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -65,27 +65,59 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_square_ao - allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, int2_grad1_u12_square_ao) + !$OMP DO COLLAPSE(4) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + ao_two_e_tc_tot(j,l,k,i) = 0.d0 + do ipoint = 1, n_points_final_grid + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) + int2_grad1_u12_square_ao(j,l,ipoint) * weight1 * ao_i_r * ao_k_r + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & - , 0.d0, ao_two_e_tc_tot, ao_num*ao_num) + else + print*, ' DGEMM are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, ao_two_e_tc_tot, ao_num*ao_num) + deallocate(c_mat) + endif + FREE int2_grad1_u12_square_ao if( (tc_integ_type .eq. "semi-analytic") .and. & @@ -96,6 +128,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) PROVIDE int2_u2_env2 !$OMP PARALLEL & @@ -127,10 +160,13 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n , int2_u2_env2(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + deallocate(c_mat) FREE int2_u2_env2 endif ! use_ipp - deallocate(c_mat) + call wall_time(time1) + print*, ' done with Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() ! --- @@ -138,38 +174,73 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_ao - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid + print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) - - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, & + !$OMP int2_grad1_u12_ao, aos_grad_in_r_array_transp_bis) + !$OMP DO COLLAPSE(4) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + do ipoint = 1, n_points_final_grid + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & + + weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) - enddo - deallocate(b_mat) + else + print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + enddo + deallocate(b_mat) + + end if FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then @@ -178,16 +249,22 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n endif ! var_tc + call wall_time(time1) + print*, ' done with non-Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() + ! --- call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + ! --- + PROVIDE ao_integrals_map !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & !$OMP PRIVATE(i, j, k, l) - !$OMP DO + !$OMP DO COLLAPSE(4) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num From 002aff90f5e6ecd7a4929eb48e75608d94f9e3a8 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 28 Mar 2024 17:05:00 +0100 Subject: [PATCH 07/10] working on mem reduction --- .../local/non_h_ints_mu/total_tc_int.irp.f | 98 ++++++++++++------- plugins/local/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 72fd0f53..b8379006 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -33,8 +33,10 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n double precision :: weight1, ao_k_r, ao_i_r double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: time0, time1 - double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:) + double precision, allocatable :: c_mat(:,:,:) + logical, external :: ao_two_e_integral_zero double precision, external :: get_ao_two_e_integral + double precision, external :: ao_two_e_integral PROVIDe tc_integ_type PROVIDE env_type @@ -194,9 +196,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & - + weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + - weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) enddo enddo enddo @@ -209,39 +211,37 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) enddo - deallocate(b_mat) + deallocate(c_mat) end if - FREE int2_grad1_u12_ao + !FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then FREE int2_grad1_u2e_ao @@ -258,19 +258,26 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) ! --- + + logical :: integ_zero + double precision :: integ_val - PROVIDE ao_integrals_map + print*, ' adding ERI to ao_two_e_tc_tot ...' - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !$OMP PRIVATE(i, j, k, l) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) !$OMP DO COLLAPSE(4) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - ! < 1:i, 2:j | 1:k, 2:l > - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integ_zero = ao_two_e_integral_zero(i,j,k,l) + if(.not. integ_zero) then + ! i,k : r1 j,l : r2 + integ_val = ao_two_e_integral(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val + endif enddo enddo enddo @@ -278,8 +285,25 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n !$OMP END DO !$OMP END PARALLEL - !call clear_ao_map() - FREE ao_integrals_map + !PROVIDE ao_integrals_map + !!$OMP PARALLEL DEFAULT(NONE) & + !!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !!$OMP PRIVATE(i, j, k, l) + !!$OMP DO COLLAPSE(4) + !do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, ao_num + ! do k = 1, ao_num + ! ! < 1:i, 2:j | 1:k, 2:l > + ! ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + !!call clear_ao_map() + !FREE ao_integrals_map if(tc_integ_type .eq. "numeric") then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index a8491660..24362796 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -286,4 +286,10 @@ doc: If |true|, memory scale of TC ao -> mo: O(N3) interface: ezfio,provider,ocaml default: False +[tc_save_mem] +type: logical +doc: If |true|, use loops to save memory TC +interface: ezfio,provider,ocaml +default: False + From 1a36d974b0bd5cd0c06453a15c96a8492c4baecc Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:33:09 +0200 Subject: [PATCH 08/10] saving lcpq-ampere --- .../local/bi_ort_ints/semi_num_ints_mo.irp.f | 68 ++++--- .../bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- .../local/bi_ort_ints/total_twoe_pot.irp.f | 87 ++++----- plugins/local/non_h_ints_mu/tc_integ.irp.f | 175 ++++++++++++++++-- .../local/non_h_ints_mu/total_tc_int.irp.f | 93 +++++----- plugins/local/tc_keywords/EZFIO.cfg | 8 +- .../local/tc_scf/fock_3e_bi_ortho_cs.irp.f | 121 +++++++----- .../local/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 2 +- plugins/local/tc_scf/fock_tc.irp.f | 2 +- .../local/tc_scf/write_ao_2e_tc_integ.irp.f | 58 ++++++ .../multi_s_dipole_moment.irp.f | 22 ++- 11 files changed, 442 insertions(+), 196 deletions(-) create mode 100644 plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f diff --git a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f index 51f0cba4..77e4cb9b 100644 --- a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f @@ -107,8 +107,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, integer :: i, j, ipoint double precision :: wall0, wall1 - print *, ' providing int2_grad1_u12_ao_transp ...' - call wall_time(wall0) + !print *, ' providing int2_grad1_u12_ao_transp ...' + !call wall_time(wall0) if(test_cycle_tc) then @@ -142,15 +142,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, endif - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_transp (min) = ', (wall1 - wall0) / 60.d0 + !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none integer :: ipoint @@ -159,7 +159,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' + !print *, ' providing int2_grad1_u12_bimo_transp ...' !call wall_time(wall0) !$OMP PARALLEL & @@ -167,33 +167,35 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP PRIVATE (ipoint) & !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - enddo + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + enddo !$OMP END DO !$OMP END PARALLEL + !FREE int2_grad1_u12_ao_transp + !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !print *, ' wall time for int2_grad1_u12_bimo_transp (min) =', (wall1 - wall0) / 60.d0 !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none integer :: i, j, ipoint double precision :: wall0, wall1 !call wall_time(wall0) - !print *, ' Providing int2_grad1_u12_bimo_t ...' + !print *, ' providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -211,17 +213,21 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp !call wall_time(wall1) - !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !print *, ' wall time for int2_grad1_u12_bimo_t (min) =', (wall1 - wall0) / 60.d0 !call print_memory_usage() END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' providing int2_grad1_u12_ao_t ...' PROVIDE int2_grad1_u12_ao @@ -235,6 +241,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, enddo enddo + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_t (min) =', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + END_PROVIDER ! --- @@ -275,8 +285,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, double precision :: xyz double precision :: wall0, wall1 - print*, ' providing x_W_ki_bi_ortho_erf_rk ...' - call wall_time(wall0) + !print*, ' providing x_W_ki_bi_ortho_erf_rk ...' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -300,8 +310,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp - call wall_time(wall1) - print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 + !call wall_time(wall1) + !print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 END_PROVIDER @@ -323,8 +333,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_ double precision :: xyz double precision :: wall0, wall1 - print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' - call wall_time(wall0) + !print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -343,8 +353,8 @@ BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_ !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) - print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 + !call wall_time(wall1) + !print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 END_PROVIDER diff --git a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f index 726e48ba..fd4a162f 100644 --- a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -168,7 +168,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo -end subroutine give_integrals_3_body_bi_ort +end ! --- diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 1e558038..e34a7b7b 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -16,10 +16,10 @@ double precision function bi_ortho_mo_ints(l, k, j, i) integer :: m, n, p, q bi_ortho_mo_ints = 0.d0 - do m = 1, ao_num - do p = 1, ao_num - do n = 1, ao_num - do q = 1, ao_num + do p = 1, ao_num + do m = 1, ao_num + do q = 1, ao_num + do n = 1, ao_num ! p1h1p2h2 l1 l2 r1 r2 bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i) enddo @@ -27,7 +27,7 @@ double precision function bi_ortho_mo_ints(l, k, j, i) enddo enddo -end function bi_ortho_mo_ints +end ! --- @@ -43,93 +43,68 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, integer :: i, j, k, l, m, n, p, q, s, r double precision :: t1, t2, tt1, tt2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) - double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), a_pqr(:,:,:) + double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), ao_two_e_tc_tot_tmp(:,:,:) print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...' call wall_time(t1) call print_memory_usage() PROVIDE mo_r_coef mo_l_coef - PROVIDE ao_two_e_tc_tot if(ao_to_mo_tc_n3) then print*, ' memory scale of TC ao -> mo: O(N3) ' + if(.not.read_tc_integ) then + stop 'read_tc_integ needs to be set to true' + endif + allocate(a_jkp(ao_num,ao_num,mo_num)) allocate(a_kpq(ao_num,mo_num,mo_num)) - allocate(a_pqr(mo_num,mo_num,mo_num)) + allocate(ao_two_e_tc_tot_tmp(ao_num,ao_num,ao_num)) + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") call wall_time(tt1) - do s = 1, mo_num + mo_bi_ortho_tc_two_e_chemist(:,:,:,:) = 0.d0 + do l = 1, ao_num + read(11) ao_two_e_tc_tot_tmp(:,:,:) - mo_bi_ortho_tc_two_e_chemist(:,:,:,s) = 0.d0 - do l = 1, ao_num + do s = 1, mo_num - call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & - , ao_two_e_tc_tot(1,1,1,l), ao_num, mo_l_coef(1,1), ao_num & + call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot_tmp(1,1,1), ao_num, mo_l_coef(1,1), ao_num & , 0.d0, a_jkp(1,1,1), ao_num*ao_num) - + call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 & , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num & , 0.d0, a_kpq(1,1,1), ao_num*mo_num) - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0, a_pqr(1,1,1), mo_num*mo_num) - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(p, q, r) & - !$OMP SHARED(s, l, mo_num, mo_bi_ortho_tc_two_e_chemist, mo_r_coef, a_pqr) - !$OMP DO COLLAPSE(2) - do p = 1, mo_num - do q = 1, mo_num - do r = 1, mo_num - mo_bi_ortho_tc_two_e_chemist(p,q,r,s) = mo_bi_ortho_tc_two_e_chemist(p,q,r,s) + mo_r_coef(l,s) * a_pqr(p,q,r) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, mo_r_coef(l,s) & + , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 1.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,s), mo_num*mo_num) - enddo ! l + enddo ! s - if(s == 2) then + if(l == 2) then call wall_time(tt2) print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0 - elseif(s == 11) then + elseif(l == 11) then call wall_time(tt2) print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0) - elseif(s == 26) then - call wall_time(tt2) - print*, ' 25 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-25)*(tt2-tt1)/(60.d0*25.d0) - elseif(s == 51) then - call wall_time(tt2) - print*, ' 50 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-50)*(tt2-tt1)/(60.d0*50.d0) - elseif(s == 101) then + elseif(l == 101) then call wall_time(tt2) print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0 print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0) - elseif(s == 201) then - call wall_time(tt2) - print*, ' 200 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-200)*(tt2-tt1)/(60.d0*200.d0) - elseif(s == 501) then - call wall_time(tt2) - print*, ' 500 / mo_num done in (min)', (tt2-tt1)/60.d0 - print*, ' estimated time required (min)', dble(mo_num-500)*(tt2-tt1)/(60.d0*500.d0) endif + enddo ! l + close(11) - enddo ! s - - deallocate(a_jkp, a_kpq, a_pqr) + deallocate(a_jkp, a_kpq, ao_two_e_tc_tot_tmp) else @@ -141,6 +116,8 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num) + FREE ao_two_e_tc_tot + allocate(a1(ao_num,ao_num,mo_num,mo_num)) call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & diff --git a/plugins/local/non_h_ints_mu/tc_integ.irp.f b/plugins/local/non_h_ints_mu/tc_integ.irp.f index 775a9e4c..58e3db48 100644 --- a/plugins/local/non_h_ints_mu/tc_integ.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ.irp.f @@ -44,14 +44,92 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(tc_integ_type .eq. "numeric") then print *, ' Numerical integration over r1 and r2 will be performed' - - ! TODO combine 1shot & int2_grad1_u12_ao_num - PROVIDE int2_grad1_u12_ao_num - int2_grad1_u12_ao = int2_grad1_u12_ao_num + if(tc_save_mem) then - !PROVIDE int2_grad1_u12_ao_num_1shot - !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:) + double precision, allocatable :: tmp_grad1_u12(:,:,:) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num), xx(n_points_extra_final_grid)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + deallocate(tmp_grad1_u12) + if(n_rest .gt. 0) then + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + ii = n_pass*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12) + endif + deallocate(tmp,xx) + + else + ! TODO combine 1shot & int2_grad1_u12_ao_num + PROVIDE int2_grad1_u12_ao_num + int2_grad1_u12_ao = int2_grad1_u12_ao_num + !PROVIDE int2_grad1_u12_ao_num_1shot + !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then @@ -177,13 +255,88 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p print *, ' Numerical integration over r1 and r2 will be performed' - ! TODO combine 1shot & int2_grad1_u12_square_ao_num + if(tc_save_mem) then - PROVIDE int2_grad1_u12_square_ao_num - int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:,:,:) + double precision, allocatable :: tmp_grad1_u12_squared(:,:) - !PROVIDE int2_grad1_u12_square_ao_num_1shot - !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks), xx(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_blocks,1), xx(1,i_blocks,2), xx(1,i_blocks,3), tmp_grad1_u12_squared(1,i_blocks)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12_squared, xx) + if(n_rest .gt. 0) then + ii = n_pass*n_blocks + 1 + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest), xx(n_points_extra_final_grid,n_rest,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_rest,1), xx(1,i_rest,2), xx(1,i_rest,3), tmp_grad1_u12_squared(1,i_rest)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + deallocate(tmp_grad1_u12_squared, xx) + endif + deallocate(tmp) + + else + + ! TODO combine 1shot & int2_grad1_u12_square_ao_num + PROVIDE int2_grad1_u12_square_ao_num + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + !PROVIDE int2_grad1_u12_square_ao_num_1shot + !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index b8379006..a1bbd6e0 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -55,7 +55,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' Reading ao_two_e_tc_tot from ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") - read(11) ao_two_e_tc_tot + do i = 1, ao_num + read(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) else @@ -67,7 +69,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_square_ao - if(tc_save_mem) then + if(tc_save_mem_loops) then print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...' @@ -176,7 +178,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE int2_grad1_u12_ao - if(tc_save_mem) then + if(tc_save_mem_loops) then print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' @@ -241,7 +243,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n deallocate(c_mat) end if - !FREE int2_grad1_u12_ao if(tc_integ_type .eq. "semi-analytic") then FREE int2_grad1_u2e_ao @@ -264,48 +265,52 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' adding ERI to ao_two_e_tc_tot ...' - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot) - !$OMP DO COLLAPSE(4) - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - integ_zero = ao_two_e_integral_zero(i,j,k,l) - if(.not. integ_zero) then - ! i,k : r1 j,l : r2 - integ_val = ao_two_e_integral(i,k,j,l) - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val - endif + if(tc_save_mem) then + print*, ' ao_integrals_map will not be used' + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) + !$OMP DO COLLAPSE(4) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + integ_zero = ao_two_e_integral_zero(i,j,k,l) + if(.not. integ_zero) then + ! i,k : r1 j,l : r2 + integ_val = ao_two_e_integral(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val + endif + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + else + print*, ' ao_integrals_map will be used' + PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(4) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + !call clear_ao_map() + FREE ao_integrals_map + endif - !PROVIDE ao_integrals_map - !!$OMP PARALLEL DEFAULT(NONE) & - !!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !!$OMP PRIVATE(i, j, k, l) - !!$OMP DO COLLAPSE(4) - !do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! ! < 1:i, 2:j | 1:k, 2:l > - ! ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL - !!call clear_ao_map() - !FREE ao_integrals_map - - if(tc_integ_type .eq. "numeric") then + if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num endif @@ -315,7 +320,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n print*, ' Saving ao_two_e_tc_tot in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") call ezfio_set_work_empty(.False.) - write(11) ao_two_e_tc_tot + do i = 1, ao_num + write(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) call ezfio_set_tc_keywords_io_tc_integ('Read') endif diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index 24362796..70169189 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -286,10 +286,16 @@ doc: If |true|, memory scale of TC ao -> mo: O(N3) interface: ezfio,provider,ocaml default: False -[tc_save_mem] +[tc_save_mem_loops] type: logical doc: If |true|, use loops to save memory TC interface: ezfio,provider,ocaml default: False +[tc_save_mem] +type: logical +doc: If |true|, more calc but less mem +interface: ezfio,provider,ocaml +default: False + diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f index 0b883865..8fd5e5b6 100644 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f +++ b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f @@ -9,7 +9,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] double precision :: loc_1, loc_2, loc_3 double precision, allocatable :: Okappa(:), Jkappa(:,:) double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:) double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) PROVIDE mo_l_coef mo_r_coef @@ -63,17 +63,13 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] allocate(tmp_1(n_points_final_grid,4)) do ipoint = 1, n_points_final_grid - loc_1 = 2.d0 * Okappa(ipoint) - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - tmp_1(ipoint,4) = Okappa(ipoint) enddo - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & @@ -112,58 +108,81 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + if(tc_save_mem) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num + allocate(tmp_22(n_points_final_grid,4,mo_num)) + do a = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_22) + !$OMP DO + do b = 1, mo_num do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_22(:,4,b) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0 & + , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,a), 1) + enddo + deallocate(tmp_22) + + else + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + deallocate(tmp_2) - ! --- + endif - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - - deallocate(tmp_1, tmp_2) + deallocate(tmp_1) ! --- @@ -272,7 +291,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + !print *, ' total Wall time for fock_3e_uhf_mo_cs =', (tf - ti) / 60.d0 END_PROVIDER diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 63a1e162..47ee5b48 100644 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -32,7 +32,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] endif !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + !print *, ' Wall time for fock_3e_uhf_mo_a (min) =', (tf - ti)/60.d0 END_PROVIDER diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index 282f9873..d3ddb8ad 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -175,7 +175,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] +BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] BEGIN_DOC ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f new file mode 100644 index 00000000..7ce57578 --- /dev/null +++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f @@ -0,0 +1,58 @@ +! --- + +program write_ao_2e_tc_integ + + implicit none + + PROVIDE j1e_type + PROVIDE j2e_type + + print *, ' j1e_type = ', j1e_type + print *, ' j2e_type = ', j2e_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + PROVIDE ao_two_e_tc_tot + +end + +! --- + diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index f21e08cd..c7216a61 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -102,12 +102,28 @@ END_PROVIDER &BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval, (N_states)] implicit none + double precision, allocatable :: eigval(:), eigvec(:,:), A(:,:) PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment - call lapack_diag(multi_s_x_dipole_moment_eigenval(1), multi_s_x_dipole_moment_eigenvec(1,1), multi_s_x_dipole_moment(1,1), N_states, N_states) - call lapack_diag(multi_s_y_dipole_moment_eigenval(1), multi_s_y_dipole_moment_eigenvec(1,1), multi_s_y_dipole_moment(1,1), N_states, N_states) - call lapack_diag(multi_s_z_dipole_moment_eigenval(1), multi_s_z_dipole_moment_eigenvec(1,1), multi_s_z_dipole_moment(1,1), N_states, N_states) + allocate(A(N_states,N_states), eigvec(N_states,N_states), eigval(N_states)) + + A = multi_s_x_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_x_dipole_moment_eigenval = eigval + multi_s_x_dipole_moment_eigenvec = eigvec + + A = multi_s_y_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_y_dipole_moment_eigenval = eigval + multi_s_y_dipole_moment_eigenvec = eigvec + + A = multi_s_z_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_z_dipole_moment_eigenval = eigval + multi_s_z_dipole_moment_eigenvec = eigvec + + deallocate(A, eigvec, eigval) END_PROVIDER From e65d7913bfdf83159ffd50eb39c76e63dea221d5 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 7 Apr 2024 00:43:32 +0200 Subject: [PATCH 09/10] saving lcpq --- bin/qp_convert_output_to_ezfio | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio index 1b33f156..6f2d02d0 100755 --- a/bin/qp_convert_output_to_ezfio +++ b/bin/qp_convert_output_to_ezfio @@ -227,8 +227,8 @@ def write_ezfio(res, filename): shell_index += [nshell_tot] * len(b.prim) shell_num = len(ang_mom) - assert(shell_index[0] = 1) - assert(shell_index[-1] = shell_num) + assert(shell_index[0] == 1) + assert(shell_index[-1] == shell_num) # ~#~#~#~#~ # # W r i t e # From 2c899e6dd71247ae26cd337ede2bb13ce9419489 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 20 Apr 2024 12:39:39 +0200 Subject: [PATCH 10/10] few modif in grids --- .../extra_grid_vector.irp.f | 23 +++++++++++++++++-- .../grid_becke_vector.irp.f | 6 ++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index ae167282..16a52dc6 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -47,8 +47,12 @@ END_PROVIDER END_DOC implicit none - integer :: i,j,k,l,i_count - double precision :: r(3) + integer :: i, j, k, l, i_count + double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing extra_final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -66,10 +70,25 @@ END_PROVIDER index_final_points_extra(2,i_count) = i index_final_points_extra(3,i_count) = j index_final_points_extra_reverse(k,i,j) = i_count + + if(final_weight_at_r_vector_extra(i_count) .lt. 0.d0) then + print *, ' !!! WARNING !!!' + print *, ' negative weight !!!!' + print *, i_count, final_weight_at_r_vector_extra(i_count) + if(dabs(final_weight_at_r_vector_extra(i_count)) .lt. 1d-10) then + final_weight_at_r_vector_extra(i_count) = 0.d0 + else + stop + endif + endif enddo enddo enddo + call wall_time(wall1) + print *, ' wall time for extra_final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 473096d0..c35918c3 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -72,7 +72,11 @@ END_PROVIDER print *, ' !!! WARNING !!!' print *, ' negative weight !!!!' print *, i_count, final_weight_at_r_vector(i_count) - stop + if(dabs(final_weight_at_r_vector(i_count)) .lt. 1d-10) then + final_weight_at_r_vector(i_count) = 0.d0 + else + stop + endif endif enddo enddo