mirror of
https://github.com/LCPQ/quantum_package
synced 2024-10-19 22:41:48 +02:00
Better estimate of the PT2 memory
This commit is contained in:
parent
bd72bace4e
commit
a38b57ac34
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration of EZFIO package
|
||||||
|
|
||||||
export QP_EZFIO=${QP_ROOT}/external/ezfio
|
export QP_EZFIO=${QP_ROOT}/external/ezfio
|
||||||
|
|
||||||
if [[ -f ${QP_EZFIO}/Bash/ezfio.sh ]]; then
|
if [[ -f ${QP_EZFIO}/Bash/ezfio.sh ]]; then
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration of IRPF90 package
|
||||||
|
|
||||||
export IRPF90=${QP_ROOT}/bin/irpf90
|
export IRPF90=${QP_ROOT}/bin/irpf90
|
||||||
|
|
||||||
# Load irpman shell completion
|
# Load irpman shell completion
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration of additional libraries required for modules
|
||||||
|
|
||||||
QP_LIB=""
|
QP_LIB=""
|
||||||
|
|
||||||
# Include here the optional external libraries to link with your binaries,
|
# Include here the optional external libraries to link with your binaries,
|
||||||
|
15
etc/local.rc
Normal file
15
etc/local.rc
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Configuration specific to the local machine
|
||||||
|
|
||||||
|
# Maximum allowed memory per node
|
||||||
|
# export QP_MAXMEM=64
|
||||||
|
|
||||||
|
# Target number of threads for Davidson's algorithm
|
||||||
|
# export QP_NTHREADS_DAVIDSON=32
|
||||||
|
|
||||||
|
# Target number of threads for the computation of the PT2
|
||||||
|
# export QP_NTHREADS_PT2=32
|
||||||
|
|
||||||
|
# Name of the network interface to be chosen
|
||||||
|
# export QP_NIC=ib0
|
||||||
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
|||||||
# Choose the correct network interface if the default one is incorrect
|
|
||||||
# export QP_NIC=ib0
|
|
||||||
# export QP_NIC=eth0
|
|
||||||
|
|
||||||
|
|
@ -1 +1,3 @@
|
|||||||
|
# Configuration for the Ninja package
|
||||||
|
|
||||||
export NINJA=${QP_ROOT}/bin/ninja
|
export NINJA=${QP_ROOT}/bin/ninja
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration for the OCaml compiler
|
||||||
|
|
||||||
if [[ -z $OPAMROOT ]]
|
if [[ -z $OPAMROOT ]]
|
||||||
then
|
then
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration of all the paths to executables and libraries
|
||||||
|
|
||||||
if [[ -z $QP_PYTHON ]]
|
if [[ -z $QP_PYTHON ]]
|
||||||
then
|
then
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
# Configuration of the qp shell command
|
||||||
|
|
||||||
if [[ "$(ps -p $$ -ocomm=)" == "zsh" ]] ; then
|
if [[ "$(ps -p $$ -ocomm=)" == "zsh" ]] ; then
|
||||||
autoload bashcompinit
|
autoload bashcompinit
|
||||||
bashcompinit
|
bashcompinit
|
||||||
|
@ -46,6 +46,9 @@ subroutine run_cipsi
|
|||||||
psi_coef = psi_coef_sorted
|
psi_coef = psi_coef_sorted
|
||||||
N_det = N_det_max
|
N_det = N_det_max
|
||||||
soft_touch N_det psi_det psi_coef
|
soft_touch N_det psi_det psi_coef
|
||||||
|
if (s2_eig) then
|
||||||
|
call make_s2_eigenfunction
|
||||||
|
endif
|
||||||
call diagonalize_CI
|
call diagonalize_CI
|
||||||
call save_wavefunction
|
call save_wavefunction
|
||||||
endif
|
endif
|
||||||
|
@ -185,7 +185,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in)
|
|||||||
ipos=1
|
ipos=1
|
||||||
do i= 1, N_det_generators
|
do i= 1, N_det_generators
|
||||||
do j=1,pt2_F(pt2_J(i))
|
do j=1,pt2_F(pt2_J(i))
|
||||||
write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N
|
write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N_in
|
||||||
ipos += 30
|
ipos += 30
|
||||||
if (ipos > 300000-30) then
|
if (ipos > 300000-30) then
|
||||||
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
|
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
|
||||||
@ -213,16 +213,50 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
double precision :: mem_collector, mem, rss
|
||||||
|
|
||||||
|
call resident_memory(rss)
|
||||||
|
|
||||||
|
mem_collector = 8.d0 * & ! bytes
|
||||||
|
( 1.d0*pt2_n_tasks_max & ! task_id, index
|
||||||
|
+ 0.635d0*N_det_generators & ! f,d
|
||||||
|
+ 3.d0*N_det_generators*N_states & ! eI, vI, nI
|
||||||
|
+ 3.d0*pt2_n_tasks_max*N_states & ! eI_task, vI_task, nI_task
|
||||||
|
+ 4.d0*(pt2_N_teeth+1) & ! S, S2, T2, T3
|
||||||
|
+ 1.d0*(N_int*2.d0*N + N) & ! selection buffer
|
||||||
|
+ 1.d0*(N_int*2.d0*N + N) & ! sort selection buffer
|
||||||
|
) / 1024.d0**3
|
||||||
|
|
||||||
integer :: nproc_target
|
integer :: nproc_target
|
||||||
nproc_target = nproc
|
nproc_target = nthreads_pt2
|
||||||
double precision :: mem
|
|
||||||
mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3)
|
do
|
||||||
call write_double(6,mem,'Estimated memory/thread (Gb)')
|
mem = mem_collector + & !
|
||||||
if (qp_max_mem > 0) then
|
nproc_target * 8.d0 * & ! bytes
|
||||||
nproc_target = max(1,int(dble(qp_max_mem)/mem))
|
( 0.5d0*pt2_n_tasks_max & ! task_id
|
||||||
nproc_target = min(nproc_target,nproc)
|
+ 64.d0*pt2_n_tasks_max & ! task
|
||||||
|
+ 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm
|
||||||
|
+ 1.d0*pt2_n_tasks_max & ! i_generator, subset
|
||||||
|
+ 2.d0*(N_int*2.d0*N_in + N_in) & ! selection buffers
|
||||||
|
+ 1.d0*(N_int*2.d0*N_in + N_in) & ! sort/merge selection buffers
|
||||||
|
) / 1024.d0**3
|
||||||
|
|
||||||
|
if (nproc_target == 0) then
|
||||||
|
call check_mem(mem,irp_here)
|
||||||
|
nproc_target = 1
|
||||||
|
exit
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if (mem+rss < qp_max_mem) then
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
nproc_target = nproc_target - 1
|
||||||
|
|
||||||
|
enddo
|
||||||
|
call write_int(6,nproc_target,'Number of threads for PT2')
|
||||||
|
call write_double(6,mem,'Memory (Gb)')
|
||||||
|
|
||||||
call omp_set_nested(.false.)
|
call omp_set_nested(.false.)
|
||||||
|
|
||||||
|
|
||||||
@ -329,6 +363,8 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, &
|
|||||||
rss += memory_of_double(pt2_N_teeth+1)*4.d0
|
rss += memory_of_double(pt2_N_teeth+1)*4.d0
|
||||||
call check_mem(rss,irp_here)
|
call check_mem(rss,irp_here)
|
||||||
|
|
||||||
|
! If an allocation is added here, the estimate of the memory should also be
|
||||||
|
! updated in ZMQ_pt2
|
||||||
allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
|
allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
|
||||||
allocate(d(N_det_generators+1))
|
allocate(d(N_det_generators+1))
|
||||||
allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
|
allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
|
||||||
|
@ -48,6 +48,9 @@ subroutine run_stochastic_cipsi
|
|||||||
psi_coef = psi_coef_sorted
|
psi_coef = psi_coef_sorted
|
||||||
N_det = N_det_max
|
N_det = N_det_max
|
||||||
soft_touch N_det psi_det psi_coef
|
soft_touch N_det psi_det psi_coef
|
||||||
|
if (s2_eig) then
|
||||||
|
call make_s2_eigenfunction
|
||||||
|
endif
|
||||||
call diagonalize_CI
|
call diagonalize_CI
|
||||||
call save_wavefunction
|
call save_wavefunction
|
||||||
endif
|
endif
|
||||||
|
@ -452,10 +452,10 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
|
|||||||
END_DOC
|
END_DOC
|
||||||
nthreads_davidson = nproc
|
nthreads_davidson = nproc
|
||||||
character*(32) :: env
|
character*(32) :: env
|
||||||
call getenv('NTHREADS_DAVIDSON',env)
|
call getenv('QP_NTHREADS_DAVIDSON',env)
|
||||||
if (trim(env) /= '') then
|
if (trim(env) /= '') then
|
||||||
read(env,*) nthreads_davidson
|
read(env,*) nthreads_davidson
|
||||||
call write_int(6,nthreads_davidson,'Number of threads for <Psi|H|Psi>')
|
call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
|
||||||
endif
|
endif
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
integer :: iter2, itertot
|
integer :: iter2, itertot
|
||||||
double precision, allocatable :: W(:,:), U(:,:), S(:,:), overlap(:,:)
|
double precision, allocatable :: W(:,:), U(:,:), S(:,:), overlap(:,:)
|
||||||
double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:)
|
double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:)
|
||||||
double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:)
|
double precision, allocatable :: s_(:,:), s_tmp(:,:)
|
||||||
double precision :: diag_h_mat_elem
|
double precision :: diag_h_mat_elem
|
||||||
double precision, allocatable :: residual_norm(:)
|
double precision, allocatable :: residual_norm(:)
|
||||||
character*(16384) :: write_buffer
|
character*(16384) :: write_buffer
|
||||||
@ -137,6 +137,13 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
||||||
itertot = 0
|
itertot = 0
|
||||||
|
|
||||||
|
if (state_following) then
|
||||||
|
allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
|
||||||
|
else
|
||||||
|
allocate(overlap(1,1)) ! avoid 'if' for deallocate
|
||||||
|
endif
|
||||||
|
overlap = 0.d0
|
||||||
|
|
||||||
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
|
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
|
||||||
|
|
||||||
call write_time(6)
|
call write_time(6)
|
||||||
@ -149,25 +156,51 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
call write_int(6,N_st,'Number of states')
|
call write_int(6,N_st,'Number of states')
|
||||||
call write_int(6,N_st_diag,'Number of states in diagonalization')
|
call write_int(6,N_st_diag,'Number of states in diagonalization')
|
||||||
call write_int(6,sze,'Number of determinants')
|
call write_int(6,sze,'Number of determinants')
|
||||||
|
|
||||||
|
! Find max number of cores to fit in memory
|
||||||
|
! -----------------------------------------
|
||||||
|
|
||||||
nproc_target = nproc
|
nproc_target = nproc
|
||||||
double precision :: rss
|
double precision :: rss
|
||||||
|
integer :: maxab
|
||||||
|
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
||||||
|
|
||||||
call resident_memory(rss)
|
call resident_memory(rss)
|
||||||
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
do
|
||||||
+ 3.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
r1 = 8.d0 * &! bytes
|
||||||
do while (r1+rss > qp_max_mem)
|
( 3.d0*(dble(sze)*(N_st_diag*itermax)) &! W,U,S
|
||||||
nproc_target = nproc_target - 1
|
+ 4.d0*(N_st_diag*itermax)**2 &! h,y,s_,s_tmp
|
||||||
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
+ 2.d0*(N_st_diag*itermax) &! s2,lambda
|
||||||
+ 3.d0*(N_st_diag*itermax)+nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
+ 1.d0*(N_st_diag) &! residual_norm
|
||||||
|
! In H_S2_u_0_nstates_zmq
|
||||||
|
+ 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector
|
||||||
|
+ 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave
|
||||||
|
+ 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_*
|
||||||
|
+ nproc_target * &! In OMP section
|
||||||
|
( 1.d0*(N_int*maxab) &! buffer
|
||||||
|
+ 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx
|
||||||
|
) / 1024.d0**3
|
||||||
|
|
||||||
if (nproc_target == 0) then
|
if (nproc_target == 0) then
|
||||||
call check_mem(r1,irp_here)
|
call check_mem(r1,irp_here)
|
||||||
nproc_target = 1
|
nproc_target = 1
|
||||||
exit
|
exit
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if (r1+rss < qp_max_mem) then
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
nproc_target = nproc_target - 1
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
nthreads_davidson = nproc_target
|
nthreads_davidson = nproc_target
|
||||||
TOUCH nthreads_davidson
|
TOUCH nthreads_davidson
|
||||||
call write_int(6,nproc_target,'Number of threads for diagonalization')
|
call write_int(6,nproc_target,'Number of threads for diagonalization')
|
||||||
call write_double(6, r1, 'Memory(Gb)')
|
call write_double(6, r1, 'Memory(Gb)')
|
||||||
|
|
||||||
|
!---------------
|
||||||
|
|
||||||
write(6,'(A)') ''
|
write(6,'(A)') ''
|
||||||
write_buffer = '====='
|
write_buffer = '====='
|
||||||
do i=1,N_st
|
do i=1,N_st
|
||||||
@ -198,9 +231,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
||||||
s_tmp(N_st_diag*itermax,N_st_diag*itermax), &
|
s_tmp(N_st_diag*itermax,N_st_diag*itermax), &
|
||||||
residual_norm(N_st_diag), &
|
residual_norm(N_st_diag), &
|
||||||
c(N_st_diag*itermax), &
|
|
||||||
s2(N_st_diag*itermax), &
|
s2(N_st_diag*itermax), &
|
||||||
overlap(N_st_diag*itermax, N_st_diag*itermax), &
|
|
||||||
lambda(N_st_diag*itermax))
|
lambda(N_st_diag*itermax))
|
||||||
|
|
||||||
h = 0.d0
|
h = 0.d0
|
||||||
@ -503,7 +534,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
deallocate ( &
|
deallocate ( &
|
||||||
W, residual_norm, &
|
W, residual_norm, &
|
||||||
U, overlap, &
|
U, overlap, &
|
||||||
c, S, &
|
S, &
|
||||||
h, &
|
h, &
|
||||||
y, s_, s_tmp, &
|
y, s_, s_tmp, &
|
||||||
lambda &
|
lambda &
|
||||||
|
14
src/fci/environment.irp.f
Normal file
14
src/fci/environment.irp.f
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
BEGIN_PROVIDER [ integer, nthreads_pt2 ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Number of threads for Davidson
|
||||||
|
END_DOC
|
||||||
|
nthreads_pt2 = nproc
|
||||||
|
character*(32) :: env
|
||||||
|
call getenv('QP_NTHREADS_PT2',env)
|
||||||
|
if (trim(env) /= '') then
|
||||||
|
read(env,*) nthreads_pt2
|
||||||
|
call write_int(6,nthreads_pt2,'Target number of threads for PT2')
|
||||||
|
endif
|
||||||
|
END_PROVIDER
|
||||||
|
|
Loading…
Reference in New Issue
Block a user