10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-06-26 15:12:19 +02:00

working on memory footprint

This commit is contained in:
Abdallah Ammar 2023-06-04 09:19:34 +02:00
parent 072bea8041
commit a791a28523
7 changed files with 121 additions and 72 deletions

View File

@ -62,20 +62,30 @@ END_PROVIDER
enddo
enddo
FREE grid_points_per_atom
FREE final_weight_at_r
END_PROVIDER
! ---
BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)]
implicit none
BEGIN_DOC
! Transposed final_grid_points
! Transposed final_grid_points
END_DOC
implicit none
integer :: i,j
do j=1,3
do i=1,n_points_final_grid
do j = 1, 3
do i = 1, n_points_final_grid
final_grid_points_transp(i,j) = final_grid_points(j,i)
enddo
enddo
END_PROVIDER
! ---

View File

@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
enddo
enddo
FREE int2_grad1_u12_ao_test
else
PROVIDE int2_grad1_u12_ao
@ -153,14 +155,14 @@ END_PROVIDER
BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
implicit none
integer :: ipoint
integer :: ipoint
double precision :: wall0, wall1
PROVIDE mo_l_coef mo_r_coef
PROVIDE int2_grad1_u12_ao_transp
!print *, ' providing int2_grad1_u12_bimo_transp'
!call wall_time(wall0)
print *, ' providing int2_grad1_u12_bimo_transp'
call wall_time(wall0)
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
!$OMP END DO
!$OMP END PARALLEL
!call wall_time(wall1)
!print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
call wall_time(wall1)
print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
call print_memory_usage()
END_PROVIDER
@ -188,7 +191,11 @@ END_PROVIDER
BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)]
implicit none
integer :: i, j, ipoint
integer :: i, j, ipoint
double precision :: wall0, wall1
call wall_time(wall0)
print *, ' Providing int2_grad1_u12_bimo_t ...'
PROVIDE mo_l_coef mo_r_coef
PROVIDE int2_grad1_u12_bimo_transp
@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,
FREE int2_grad1_u12_bimo_transp
call wall_time(wall1)
print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0
call print_memory_usage()
END_PROVIDER
! ---

View File

@ -30,10 +30,10 @@
implicit none
integer :: ipoint, i, j, k, l, m
double precision :: wall1, wall0
double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:)
double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:)
double precision, allocatable :: tmp_4d(:,:,:,:)
double precision, allocatable :: tmp5(:,:,:)
double precision, allocatable :: tmp7(:,:)
double precision, allocatable :: tmp4(:,:,:)
double precision, allocatable :: tmp5(:,:)
double precision, allocatable :: tmp_3d(:,:,:)
print *, ' Providing the three_e_4_idx_bi_ort ...'
@ -47,7 +47,6 @@
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp2(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp3(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp4(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
@ -55,7 +54,7 @@
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1, tmp2, tmp3, tmp4)
!$OMP tmp1, tmp2, tmp3)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
@ -69,13 +68,9 @@
tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
enddo
enddo
enddo
@ -99,7 +94,7 @@
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid &
, tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp1)
@ -116,8 +111,30 @@
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid &
, tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp2)
@ -135,11 +152,10 @@
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid &
, tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp3)
deallocate(tmp4)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
@ -155,8 +171,6 @@
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
@ -199,26 +213,26 @@
allocate(tmp_3d(mo_num,mo_num,mo_num))
allocate(tmp7(n_points_final_grid,mo_num))
allocate(tmp5(n_points_final_grid,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP final_weight_at_r_vector, &
!$OMP tmp7)
!$OMP final_weight_at_r_vector, &
!$OMP tmp5)
!$OMP DO
do i = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
allocate(tmp5(n_points_final_grid,mo_num,mo_num))
allocate(tmp4(n_points_final_grid,mo_num,mo_num))
do m = 1, mo_num
@ -227,13 +241,13 @@
!$OMP PRIVATE (i, k, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, m, &
!$OMP int2_grad1_u12_bimo_t, &
!$OMP tmp5)
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do k = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) &
tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) &
+ int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i)
enddo
@ -243,7 +257,7 @@
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 &
, tmp7, n_points_final_grid, tmp5, n_points_final_grid &
, tmp5, n_points_final_grid, tmp4, n_points_final_grid &
, 0.d0, tmp_3d, mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k)
@ -264,13 +278,13 @@
!$OMP SHARED (mo_num, n_points_final_grid, m, &
!$OMP mos_l_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp5)
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do k = 1, mo_num
do j = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) &
tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) &
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) &
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) )
@ -281,7 +295,7 @@
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 &
, tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid &
, tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid &
, 0.d0, tmp_3d, mo_num*mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k)
@ -296,7 +310,7 @@
enddo
deallocate(tmp7)
deallocate(tmp5)
deallocate(tmp_3d)
@ -309,13 +323,13 @@
!$OMP SHARED (mo_num, n_points_final_grid, i, &
!$OMP mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp5)
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do j = 1, mo_num
do m = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) &
tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) &
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) &
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) )
@ -326,12 +340,12 @@
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 &
, tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid &
, tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid &
, 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num)
enddo
deallocate(tmp5)
deallocate(tmp4)
! !$OMP PARALLEL DO PRIVATE(i,j,k,m)

View File

@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid,
mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i)
enddo
enddo
FREE mos_r_in_r_array
END_PROVIDER
@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r
! ---
BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)]
BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)]
BEGIN_DOC
! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo
mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i)
enddo
enddo
FREE mos_l_in_r_array
END_PROVIDER

View File

@ -54,11 +54,13 @@ subroutine run_cipsi_tc
implicit none
if (.not.is_zmq_slave) then
if (.not. is_zmq_slave) then
PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
if(elec_alpha_num+elec_beta_num.ge.3)then
if(elec_alpha_num+elec_beta_num .ge. 3) then
if(three_body_h_tc)then
call provide_all_three_ints_bi_ortho
call provide_all_three_ints_bi_ortho()
endif
endif
! ---

View File

@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
PROVIDE N_int
print*,' Providing normal_two_body_bi_orth ...'
call wall_time(wall0)
if(read_tc_norm_ord) then
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read")
@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
call wall_time(wall1)
print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0
call wall_time(wall1)
print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0
END_PROVIDER
! ---

View File

@ -1,24 +1,38 @@
subroutine provide_all_three_ints_bi_ortho
implicit none
BEGIN_DOC
! routine that provides all necessary three-electron integrals
END_DOC
if(three_body_h_tc)then
if(three_e_3_idx_term)then
PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
endif
if(three_e_4_idx_term)then
PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort
endif
if(.not.double_normal_ord.and.three_e_5_idx_term)then
PROVIDE three_e_5_idx_direct_bi_ort
elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then
PROVIDE normal_two_body_bi_orth
endif
! ---
subroutine provide_all_three_ints_bi_ortho()
BEGIN_DOC
! routine that provides all necessary three-electron integrals
END_DOC
implicit none
if(three_body_h_tc) then
if(three_e_3_idx_term) then
PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
endif
if(three_e_4_idx_term) then
PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort
endif
if(.not. double_normal_ord. and. three_e_5_idx_term) then
PROVIDE three_e_5_idx_direct_bi_ort
elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then
PROVIDE normal_two_body_bi_orth
endif
endif
return
end
! ---
subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
implicit none
BEGIN_DOC