9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-08 19:32:58 +01:00

working on mem reduction

This commit is contained in:
AbdAmmar 2024-03-28 17:05:00 +01:00
parent f8bff47122
commit 002aff90f5
2 changed files with 67 additions and 37 deletions

View File

@ -33,8 +33,10 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
double precision :: weight1, ao_k_r, ao_i_r double precision :: weight1, ao_k_r, ao_i_r
double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
double precision :: time0, time1 double precision :: time0, time1
double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:) double precision, allocatable :: c_mat(:,:,:)
logical, external :: ao_two_e_integral_zero
double precision, external :: get_ao_two_e_integral double precision, external :: get_ao_two_e_integral
double precision, external :: ao_two_e_integral
PROVIDe tc_integ_type PROVIDe tc_integ_type
PROVIDE env_type PROVIDE env_type
@ -194,9 +196,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
ao_i_r = aos_in_r_array_transp(ipoint,i) ao_i_r = aos_in_r_array_transp(ipoint,i)
ao_k_r = aos_in_r_array_transp(ipoint,k) ao_k_r = aos_in_r_array_transp(ipoint,k)
ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) &
+ weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & - weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) &
+ weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & - weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) &
+ weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) - weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
enddo enddo
enddo enddo
enddo enddo
@ -209,12 +211,13 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...'
allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) allocate(c_mat(n_points_final_grid,ao_num,ao_num))
do m = 1, 3
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) &
!$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, &
!$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m)
!$OMP DO SCHEDULE (static) !$OMP DO SCHEDULE (static)
do i = 1, ao_num do i = 1, ao_num
do k = 1, ao_num do k = 1, ao_num
@ -224,24 +227,21 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
ao_i_r = aos_in_r_array_transp(ipoint,i) ao_i_r = aos_in_r_array_transp(ipoint,i)
ao_k_r = aos_in_r_array_transp(ipoint,k) ao_k_r = aos_in_r_array_transp(ipoint,k)
b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m))
b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
enddo enddo
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
do m = 1, 3
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 &
, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
, 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num)
enddo enddo
deallocate(b_mat) deallocate(c_mat)
end if end if
FREE int2_grad1_u12_ao !FREE int2_grad1_u12_ao
if(tc_integ_type .eq. "semi-analytic") then if(tc_integ_type .eq. "semi-analytic") then
FREE int2_grad1_u2e_ao FREE int2_grad1_u2e_ao
@ -259,18 +259,25 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
! --- ! ---
PROVIDE ao_integrals_map logical :: integ_zero
double precision :: integ_val
print*, ' adding ERI to ao_two_e_tc_tot ...'
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) &
!$OMP PRIVATE(i, j, k, l) !$OMP SHARED(ao_num, ao_two_e_tc_tot)
!$OMP DO COLLAPSE(4) !$OMP DO COLLAPSE(4)
do j = 1, ao_num do j = 1, ao_num
do l = 1, ao_num do l = 1, ao_num
do i = 1, ao_num do i = 1, ao_num
do k = 1, ao_num do k = 1, ao_num
! < 1:i, 2:j | 1:k, 2:l > integ_zero = ao_two_e_integral_zero(i,j,k,l)
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) if(.not. integ_zero) then
! i,k : r1 j,l : r2
integ_val = ao_two_e_integral(i,k,j,l)
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val
endif
enddo enddo
enddo enddo
enddo enddo
@ -278,8 +285,25 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
!call clear_ao_map() !PROVIDE ao_integrals_map
FREE ao_integrals_map !!$OMP PARALLEL DEFAULT(NONE) &
!!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
!!$OMP PRIVATE(i, j, k, l)
!!$OMP DO COLLAPSE(4)
!do j = 1, ao_num
! do l = 1, ao_num
! do i = 1, ao_num
! do k = 1, ao_num
! ! < 1:i, 2:j | 1:k, 2:l >
! ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
! enddo
! enddo
! enddo
!enddo
!!$OMP END DO
!!$OMP END PARALLEL
!!call clear_ao_map()
!FREE ao_integrals_map
if(tc_integ_type .eq. "numeric") then if(tc_integ_type .eq. "numeric") then
FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num

View File

@ -286,4 +286,10 @@ doc: If |true|, memory scale of TC ao -> mo: O(N3)
interface: ezfio,provider,ocaml interface: ezfio,provider,ocaml
default: False default: False
[tc_save_mem]
type: logical
doc: If |true|, use loops to save memory TC
interface: ezfio,provider,ocaml
default: False