mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-21 11:03:29 +01:00
Reduced memory in cholesky SCF
This commit is contained in:
parent
e35f847341
commit
905d88529f
@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
double precision, pointer :: L(:,:), L_old(:,:)
|
||||
|
||||
|
||||
double precision, parameter :: s = 1.d-1
|
||||
double precision :: s
|
||||
double precision, parameter :: dscale = 1.d0
|
||||
|
||||
double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:)
|
||||
@ -47,6 +47,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
integer :: block_size, iblock, ierr
|
||||
|
||||
integer(omp_lock_kind), allocatable :: lock(:)
|
||||
|
||||
double precision :: rss
|
||||
double precision, external :: memory_of_double, memory_of_int
|
||||
|
||||
|
||||
PROVIDE nucl_coord
|
||||
|
||||
if (.not.do_direct_integrals) then
|
||||
@ -57,6 +62,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
ndim = ao_num*ao_num
|
||||
tau = ao_cholesky_threshold
|
||||
|
||||
rss = 6.d0 * memory_of_double(ndim) + &
|
||||
6.d0 * memory_of_int(ndim)
|
||||
call check_mem(rss, irp_here)
|
||||
|
||||
allocate(L(ndim,1))
|
||||
|
||||
@ -97,7 +105,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
else
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided)
|
||||
do i=1,ndim
|
||||
D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), &
|
||||
addr(2,i), addr(2,i), &
|
||||
@ -130,6 +138,14 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
! a.
|
||||
i = i+1
|
||||
|
||||
logical :: memory_ok
|
||||
memory_ok = .False.
|
||||
|
||||
s = 1.d-2
|
||||
|
||||
! Inrease s until the arrays fit in memory
|
||||
do
|
||||
|
||||
! b.
|
||||
Dmin = max(s*Dmax,tau)
|
||||
|
||||
@ -147,6 +163,26 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
endif
|
||||
enddo
|
||||
|
||||
call resident_memory(rss)
|
||||
rss = rss &
|
||||
+ np*memory_of_double(nq) & ! Delta(np,nq)
|
||||
+ (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq)
|
||||
+ (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size)
|
||||
! Ltmp_q(nq,block_size)
|
||||
|
||||
if (rss > qp_max_mem) then
|
||||
s = s*2.d0
|
||||
else
|
||||
exit
|
||||
endif
|
||||
|
||||
if (nq == 0) then
|
||||
print *, 'Not enough memory. Reduce cholesky threshold'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
! d., e.
|
||||
block_size = max(N,24)
|
||||
|
||||
@ -198,7 +234,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic,8)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do m=1,nq
|
||||
|
||||
call omp_set_lock(lock(m))
|
||||
|
@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num)
|
||||
!$OMP PARALLEL DO PRIVATE(i,k) &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP SHARED (ao_num,ao_two_e_integral_schwartz) &
|
||||
!$OMP SCHEDULE(dynamic)
|
||||
!$OMP SCHEDULE(guided)
|
||||
do i=1,ao_num
|
||||
do k=1,i
|
||||
ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k))
|
||||
@ -975,7 +975,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -998,7 +999,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
endif
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
endif
|
||||
|
||||
ny=0
|
||||
@ -1017,7 +1019,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
end
|
||||
|
||||
recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
@ -1057,7 +1060,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
|
||||
ny=0
|
||||
|
||||
@ -1069,7 +1073,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
|
||||
end
|
||||
|
||||
@ -1098,7 +1103,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1118,7 +1124,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
|
||||
ny=0
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1130,7 +1137,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
end
|
||||
|
||||
recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
@ -1177,9 +1185,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
Y(1) = D_00(1)
|
||||
Y(2) = D_00(2)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
|
||||
return
|
||||
|
||||
@ -1199,7 +1207,8 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_01,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_01,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd)
|
||||
|
||||
ny = 0
|
||||
!DIR$ LOOP COUNT(6)
|
||||
@ -1208,9 +1217,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
enddo
|
||||
call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
!DIR$ FORCEINLINE
|
||||
call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
|
||||
end select
|
||||
end
|
||||
@ -1300,3 +1309,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd)
|
||||
end
|
||||
|
||||
|
||||
!DIR$ FORCEINLINE
|
||||
subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Multiply two polynomials
|
||||
! D(t) += B(t)*C(t)
|
||||
END_DOC
|
||||
|
||||
integer, intent(in) :: nb
|
||||
integer, intent(out) :: nd
|
||||
double precision, intent(in) :: b(0:nb), c(0:2)
|
||||
double precision, intent(inout) :: d(0:nb+2)
|
||||
|
||||
integer :: ndtmp
|
||||
integer :: ib, ic, id, k
|
||||
if(nb < 0) return !False if nb>=0
|
||||
|
||||
select case (nb)
|
||||
case (0)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(2) * b(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(2) * b(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(1) * b(2) + c(2) * b(1)
|
||||
d(4) = d(4) + c(2) * b(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
do ib=2,nb
|
||||
d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2)
|
||||
enddo
|
||||
d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1)
|
||||
d(nb+2) = d(nb+2) + c(2) * b(nb)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nb+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
end
|
||||
|
||||
|
@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
|
||||
e = 0d0
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do a = 1, nV
|
||||
do b = a+1, nV
|
||||
do c = b+1, nV
|
||||
|
@ -117,7 +117,7 @@ END_PROVIDER
|
||||
!$OMP N_det_alpha_unique,N_det_beta_unique,irp_here)
|
||||
allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) )
|
||||
tmp_a = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_a=1,N_det
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
@ -173,7 +173,7 @@ END_PROVIDER
|
||||
deallocate(tmp_a)
|
||||
|
||||
tmp_b = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_b=1,N_det
|
||||
krow = psi_bilinear_matrix_transp_rows(k_b)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
|
@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO schedule(dynamic,1024)
|
||||
!$OMP DO schedule(guided,64)
|
||||
do i=1,N_det-1
|
||||
if (duplicate(i)) then
|
||||
cycle
|
||||
|
@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
|
||||
!$OMP REDUCTION(+:accu)
|
||||
allocate(idx(0:n))
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do i = n,1,-1 ! Better OMP scheduling
|
||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
|
||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
||||
|
@ -190,27 +190,39 @@ END_PROVIDER
|
||||
|
||||
deallocate(X)
|
||||
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
|
||||
|
||||
allocate(X2(ao_num,ao_num,cholesky_ao_num,2))
|
||||
double precision :: rss
|
||||
double precision :: memory_of_double
|
||||
|
||||
integer :: iblock
|
||||
integer, parameter :: block_size = 32
|
||||
|
||||
rss = memory_of_double(ao_num*ao_num)
|
||||
call check_mem(2.d0*block_size*rss, irp_here)
|
||||
allocate(X2(ao_num,ao_num,block_size,2))
|
||||
allocate(X3(ao_num,block_size,ao_num,2))
|
||||
|
||||
! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
do iblock=1,cholesky_ao_num,block_size
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_alpha, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,1), ao_num)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_beta, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,2), ao_num)
|
||||
|
||||
allocate(X3(ao_num,cholesky_ao_num,ao_num,2))
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,cholesky_ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
X3(m,j,s,2) = X2(m,s,j,2)
|
||||
@ -218,19 +230,35 @@ END_PROVIDER
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(X2)
|
||||
else
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, &
|
||||
do s=1,ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,1), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_alpha_chol, ao_num)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, &
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,2), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_beta_chol, ao_num)
|
||||
endif
|
||||
|
||||
deallocate(X3)
|
||||
enddo
|
||||
|
||||
if (elec_alpha_num == elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
deallocate(X2,X3)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -5,7 +5,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ]
|
||||
END_DOC
|
||||
character*(128) :: env
|
||||
|
||||
qp_max_mem = 2000
|
||||
qp_max_mem = 500
|
||||
call getenv('QP_MAXMEM',env)
|
||||
if (trim(env) /= '') then
|
||||
call lock_io()
|
||||
|
Loading…
Reference in New Issue
Block a user