From a791a28523b787618d571947452fffbc4e7340c6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:19:34 +0200 Subject: [PATCH] working on memory footprint --- .../grid_becke_vector.irp.f | 18 +++- src/bi_ort_ints/semi_num_ints_mo.irp.f | 23 ++++-- src/bi_ort_ints/three_body_ijmk.irp.f | 82 +++++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 6 +- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 6 -- src/tc_bi_ortho/slater_tc_opt.irp.f | 50 +++++++---- 7 files changed, 121 insertions(+), 72 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..8982fe83 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -62,20 +62,30 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 771d3274..6354b393 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -153,14 +155,14 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' - !call wall_time(wall0) + print *, ' providing int2_grad1_u12_bimo_transp' + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call wall_time(wall1) + print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -188,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp + call wall_time(wall1) + print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 39a31751..ee7e88ef 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -30,10 +30,10 @@ implicit none integer :: ipoint, i, j, k, l, m double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:) - double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) double precision, allocatable :: tmp_3d(:,:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' @@ -47,7 +47,6 @@ allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -55,7 +54,7 @@ !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP tmp1, tmp2, tmp3) !$OMP DO COLLAPSE(2) do i = 1, mo_num do l = 1, mo_num @@ -69,13 +68,9 @@ tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - - tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo @@ -99,7 +94,7 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp1) @@ -116,8 +111,30 @@ enddo !$OMP END PARALLEL DO + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp2) @@ -135,11 +152,10 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp3) - deallocate(tmp4) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -155,8 +171,6 @@ - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -199,26 +213,26 @@ allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp7(n_points_final_grid,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp7) + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) !$OMP DO do i = 1, mo_num do ipoint = 1, n_points_final_grid - tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO !$OMP END PARALLEL - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) do m = 1, mo_num @@ -227,13 +241,13 @@ !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) enddo @@ -243,7 +257,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , tmp5, n_points_final_grid, tmp4, n_points_final_grid & , 0.d0, tmp_3d, mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -264,13 +278,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP mos_l_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do k = 1, mo_num do j = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) @@ -281,7 +295,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & , 0.d0, tmp_3d, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -296,7 +310,7 @@ enddo - deallocate(tmp7) + deallocate(tmp5) deallocate(tmp_3d) @@ -309,13 +323,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, i, & !$OMP mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do j = 1, mo_num do m = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -326,12 +340,12 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) enddo - deallocate(tmp5) + deallocate(tmp4) ! !$OMP PARALLEL DO PRIVATE(i,j,k,m) diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..8667683e 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo + + FREE mos_r_in_r_array END_PROVIDER @@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo + + FREE mos_l_in_r_array END_PROVIDER diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..f9bda058 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,11 +54,13 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif ! --- diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index cc01d144..f8e310df 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE N_int - print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call wall_time(wall1) print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a2077f0f..42c59308 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,24 +1,38 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif + +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord. and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + endif + + return end +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC