working on memory footprint

2025-04-18 22:40:27 +02:00 · 2023-06-04 09:19:34 +02:00 · 2023-06-04 09:19:34 +02:00 · a791a28523
commit a791a28523
parent 072bea8041
7 changed files with 121 additions and 72 deletions
--- a/src/becke_numerical_grid/grid_becke_vector.irp.f
+++ b/src/becke_numerical_grid/grid_becke_vector.irp.f
@ -62,20 +62,30 @@ END_PROVIDER
    enddo
  enddo

+  FREE grid_points_per_atom
+  FREE final_weight_at_r
+
 END_PROVIDER

 ! ---

 BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)]
-  implicit none
+
  BEGIN_DOC
-! Transposed final_grid_points
+  ! Transposed final_grid_points
  END_DOC

+  implicit none
  integer :: i,j
-  do j=1,3
-    do i=1,n_points_final_grid
+
+  do j = 1, 3
+    do i = 1, n_points_final_grid
      final_grid_points_transp(i,j) = final_grid_points(j,i)
    enddo
  enddo
+
 END_PROVIDER
+
+! ---
+
+
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
      enddo
    enddo

+    FREE int2_grad1_u12_ao_test
+
  else

    PROVIDE int2_grad1_u12_ao
@ -153,14 +155,14 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]

  implicit none
-  integer :: ipoint
+  integer          :: ipoint
  double precision :: wall0, wall1

  PROVIDE mo_l_coef mo_r_coef
  PROVIDE int2_grad1_u12_ao_transp

-  !print *, ' providing int2_grad1_u12_bimo_transp'
-  !call wall_time(wall0)
+  print *, ' providing int2_grad1_u12_bimo_transp'
+  call wall_time(wall0)

  !$OMP PARALLEL         &
  !$OMP DEFAULT (NONE)   &
@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
  !$OMP END DO
  !$OMP END PARALLEL

-  !call wall_time(wall1)
-  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  call wall_time(wall1)
+  print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  call print_memory_usage()

 END_PROVIDER 

@ -188,7 +191,11 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)]

  implicit none
-  integer :: i, j, ipoint
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  call wall_time(wall0)
+  print *, ' Providing int2_grad1_u12_bimo_t ...'

  PROVIDE mo_l_coef mo_r_coef
  PROVIDE int2_grad1_u12_bimo_transp
@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,

  FREE int2_grad1_u12_bimo_transp

+  call wall_time(wall1)
+  print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0
+  call print_memory_usage()
+
 END_PROVIDER 

 ! ---
--- a/src/bi_ort_ints/three_body_ijmk.irp.f
+++ b/src/bi_ort_ints/three_body_ijmk.irp.f
@ -30,10 +30,10 @@
  implicit none
  integer                       :: ipoint, i, j, k, l, m
  double precision              :: wall1, wall0
-  double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:)
+  double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:)
  double precision, allocatable :: tmp_4d(:,:,:,:)
-  double precision, allocatable :: tmp5(:,:,:)
-  double precision, allocatable :: tmp7(:,:)
+  double precision, allocatable :: tmp4(:,:,:)
+  double precision, allocatable :: tmp5(:,:)
  double precision, allocatable :: tmp_3d(:,:,:)

  print *, ' Providing the three_e_4_idx_bi_ort ...'
@ -47,7 +47,6 @@
  allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
  allocate(tmp2(n_points_final_grid,3,mo_num,mo_num))
  allocate(tmp3(n_points_final_grid,3,mo_num,mo_num))
-  allocate(tmp4(n_points_final_grid,3,mo_num,mo_num))

  !$OMP PARALLEL                                                  &
  !$OMP DEFAULT (NONE)                                            &
@ -55,7 +54,7 @@
  !$OMP SHARED (mo_num, n_points_final_grid,                      &
  !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
  !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector,  &
-  !$OMP         tmp1, tmp2, tmp3, tmp4)
+  !$OMP         tmp1, tmp2, tmp3)
  !$OMP DO COLLAPSE(2)
  do i = 1, mo_num
    do l = 1, mo_num
@ -69,13 +68,9 @@
        tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i)
        tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i)

-        tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
-        tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
-        tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
-
-        tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
-        tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
-        tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
+        tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
+        tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
+        tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
      enddo
    enddo
  enddo
@ -99,7 +94,7 @@
  !$OMP END PARALLEL DO

  call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
-            , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid            &
+            , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid            &
            , 0.d0, tmp_4d, mo_num*mo_num)

  deallocate(tmp1)
@ -116,8 +111,30 @@
  enddo
  !$OMP END PARALLEL DO

+
+
+  !$OMP PARALLEL                                                  &
+  !$OMP DEFAULT (NONE)                                            &
+  !$OMP PRIVATE (i, l, ipoint)                                    &
+  !$OMP SHARED (mo_num, n_points_final_grid,                      &
+  !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
+  !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector,  &
+  !$OMP         tmp1)
+  !$OMP DO COLLAPSE(2)
+  do i = 1, mo_num
+    do l = 1, mo_num
+      do ipoint = 1, n_points_final_grid
+        tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
+        tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
+        tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
  call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
-            , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid            &
+            , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid            &
            , 0.d0, tmp_4d, mo_num*mo_num)

  deallocate(tmp2)
@ -135,11 +152,10 @@
  !$OMP END PARALLEL DO

  call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
-            , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid            &
+            , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid            &
            , 0.d0, tmp_4d, mo_num*mo_num)

  deallocate(tmp3)
-  deallocate(tmp4)

  !$OMP PARALLEL DO PRIVATE(i,j,k,m)
  do i = 1, mo_num
@ -155,8 +171,6 @@



-  allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
-
  !$OMP PARALLEL                                                  &
  !$OMP DEFAULT (NONE)                                            &
  !$OMP PRIVATE (i, l, ipoint)                                    &
@ -199,26 +213,26 @@


  allocate(tmp_3d(mo_num,mo_num,mo_num))
-  allocate(tmp7(n_points_final_grid,mo_num))
+  allocate(tmp5(n_points_final_grid,mo_num))

  !$OMP PARALLEL                                                  &
  !$OMP DEFAULT (NONE)                                            &
  !$OMP PRIVATE (i, ipoint)                                       &
  !$OMP SHARED (mo_num, n_points_final_grid,                      &
  !$OMP         mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
-  !$OMP         final_weight_at_r_vector,  &
-  !$OMP         tmp7)
+  !$OMP         final_weight_at_r_vector,                         &
+  !$OMP         tmp5)
  !$OMP DO
  do i = 1, mo_num
    do ipoint = 1, n_points_final_grid
-      tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
+      tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
    enddo
  enddo
  !$OMP END DO
  !$OMP END PARALLEL


-  allocate(tmp5(n_points_final_grid,mo_num,mo_num))
+  allocate(tmp4(n_points_final_grid,mo_num,mo_num))

  do m = 1, mo_num

@ -227,13 +241,13 @@
    !$OMP PRIVATE (i, k, ipoint)                                   &
    !$OMP SHARED (mo_num, n_points_final_grid, m,                  &
    !$OMP         int2_grad1_u12_bimo_t,                           &
-    !$OMP         tmp5)
+    !$OMP         tmp4)
    !$OMP DO COLLAPSE(2)
    do i = 1, mo_num
      do k = 1, mo_num
        do ipoint = 1, n_points_final_grid

-          tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) &
+          tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) &
                           + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) &
                           + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i)
        enddo
@ -243,7 +257,7 @@
    !$OMP END PARALLEL

    call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 &
-              , tmp7, n_points_final_grid, tmp5, n_points_final_grid       &
+              , tmp5, n_points_final_grid, tmp4, n_points_final_grid       &
              , 0.d0, tmp_3d, mo_num)

    !$OMP PARALLEL DO PRIVATE(i,j,k)
@ -264,13 +278,13 @@
    !$OMP SHARED (mo_num, n_points_final_grid, m,                  &
    !$OMP         mos_l_in_r_array_transp,                         &
    !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
-    !$OMP         tmp5)
+    !$OMP         tmp4)
    !$OMP DO COLLAPSE(2)
    do k = 1, mo_num
      do j = 1, mo_num
        do ipoint = 1, n_points_final_grid

-          tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j)        &
+          tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j)        &
                           * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) &
                             + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) &
                             + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) )
@ -281,7 +295,7 @@
    !$OMP END PARALLEL

    call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0              &
-              , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid &
+              , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid &
              , 0.d0, tmp_3d, mo_num*mo_num)

    !$OMP PARALLEL DO PRIVATE(i,j,k)
@ -296,7 +310,7 @@

  enddo

-  deallocate(tmp7)
+  deallocate(tmp5)
  deallocate(tmp_3d)


@ -309,13 +323,13 @@
    !$OMP SHARED (mo_num, n_points_final_grid, i,                  &
    !$OMP         mos_r_in_r_array_transp,                         &
    !$OMP         int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
-    !$OMP         tmp5)
+    !$OMP         tmp4)
    !$OMP DO COLLAPSE(2)
    do j = 1, mo_num
      do m = 1, mo_num
        do ipoint = 1, n_points_final_grid

-          tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m)        &
+          tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m)        &
                           * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) &
                             + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) &
                             + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) )
@ -326,12 +340,12 @@
    !$OMP END PARALLEL

    call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0             &
-              , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid &
+              , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid &
              , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num)

  enddo

-  deallocate(tmp5)
+  deallocate(tmp4)


 !  !$OMP PARALLEL DO PRIVATE(i,j,k,m)
--- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
+++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid,
      mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) 
    enddo
  enddo
+
+  FREE mos_r_in_r_array
 
 END_PROVIDER

@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r

 ! ---

-BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)]
+BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)]

  BEGIN_DOC
  ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo
      mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) 
    enddo
  enddo
+
+  FREE mos_l_in_r_array
 
 END_PROVIDER

--- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
+++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
@ -54,11 +54,13 @@ subroutine run_cipsi_tc

  implicit none

-  if (.not.is_zmq_slave) then
+  if (.not. is_zmq_slave) then
+
    PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
-    if(elec_alpha_num+elec_beta_num.ge.3)then
+
+    if(elec_alpha_num+elec_beta_num .ge. 3) then
      if(three_body_h_tc)then
-        call provide_all_three_ints_bi_ortho
+        call provide_all_three_ints_bi_ortho()
      endif
    endif
    ! ---
--- a/src/tc_bi_ortho/normal_ordered.irp.f
+++ b/src/tc_bi_ortho/normal_ordered.irp.f
@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
 
  PROVIDE N_int

-  print*,' Providing normal_two_body_bi_orth ...'
-  call wall_time(wall0)
-
  if(read_tc_norm_ord) then

    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read")
@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
  call wall_time(wall1)
  print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0

-  call wall_time(wall1)
-  print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0
-
 END_PROVIDER 

 ! ---
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@ -1,24 +1,38 @@
-subroutine provide_all_three_ints_bi_ortho
- implicit none
- BEGIN_DOC
-! routine that provides all necessary three-electron integrals 
- END_DOC
- if(three_body_h_tc)then
-  if(three_e_3_idx_term)then
-   PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
-   PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
-  endif
-  if(three_e_4_idx_term)then
-   PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort 
-  endif
-  if(.not.double_normal_ord.and.three_e_5_idx_term)then
-   PROVIDE three_e_5_idx_direct_bi_ort 
-  elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then
-   PROVIDE normal_two_body_bi_orth
-  endif
+
+! ---
+
+subroutine provide_all_three_ints_bi_ortho()
+
+  BEGIN_DOC
+  ! routine that provides all necessary three-electron integrals 
+  END_DOC
+
+  implicit none
+
+  if(three_body_h_tc) then
+
+    if(three_e_3_idx_term) then
+      PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
+      PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
+    endif
+
+    if(three_e_4_idx_term) then
+      PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort 
+    endif
+
+    if(.not. double_normal_ord. and. three_e_5_idx_term) then
+      PROVIDE three_e_5_idx_direct_bi_ort 
+    elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then
+      PROVIDE normal_two_body_bi_orth
+    endif
+
 endif
+
+ return
 end

+! ---
+
 subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
 implicit none
  BEGIN_DOC