mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-12 05:58:24 +01:00
Memory model
This commit is contained in:
parent
ef92609192
commit
6d05caffd5
@ -139,7 +139,17 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) &
|
integer :: nproc_target
|
||||||
|
nproc_target = nproc
|
||||||
|
double precision :: mem
|
||||||
|
mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3)
|
||||||
|
call write_double(6,mem,'Estimated memory/thread (Gb)')
|
||||||
|
if (qp_max_mem > 0) then
|
||||||
|
nproc_target = max(1,int(dble(qp_max_mem)/mem))
|
||||||
|
nproc_target = min(nproc_target,nproc)
|
||||||
|
endif
|
||||||
|
|
||||||
|
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) &
|
||||||
!$OMP PRIVATE(i)
|
!$OMP PRIVATE(i)
|
||||||
i = omp_get_thread_num()
|
i = omp_get_thread_num()
|
||||||
if (i==0) then
|
if (i==0) then
|
||||||
|
@ -29,11 +29,13 @@ subroutine run_wf
|
|||||||
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
|
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
|
||||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||||
double precision :: energy(N_states)
|
double precision :: energy(N_states)
|
||||||
character*(64) :: states(2)
|
character*(64) :: states(3)
|
||||||
|
character*(64) :: old_state
|
||||||
integer :: rc, i, ierr
|
integer :: rc, i, ierr
|
||||||
double precision :: t0, t1
|
double precision :: t0, t1
|
||||||
|
|
||||||
integer, external :: zmq_get_dvector, zmq_get_N_det_generators
|
integer, external :: zmq_get_dvector, zmq_get_N_det_generators
|
||||||
|
integer, external :: zmq_get_ivector
|
||||||
integer, external :: zmq_get_psi, zmq_get_N_det_selectors
|
integer, external :: zmq_get_psi, zmq_get_N_det_selectors
|
||||||
integer, external :: zmq_get_N_states_diag
|
integer, external :: zmq_get_N_states_diag
|
||||||
|
|
||||||
@ -41,30 +43,61 @@ subroutine run_wf
|
|||||||
|
|
||||||
zmq_context = f77_zmq_ctx_new ()
|
zmq_context = f77_zmq_ctx_new ()
|
||||||
states(1) = 'selection'
|
states(1) = 'selection'
|
||||||
states(2) = 'pt2'
|
states(2) = 'davidson'
|
||||||
|
states(3) = 'pt2'
|
||||||
|
old_state = 'Waiting'
|
||||||
|
|
||||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||||
|
|
||||||
do
|
do
|
||||||
|
|
||||||
call wait_for_states(states,zmq_state,size(states))
|
if (mpi_master) then
|
||||||
print *, trim(zmq_state)
|
call wait_for_states(states,zmq_state,size(states))
|
||||||
|
if (zmq_state(1:64) == old_state(1:64)) then
|
||||||
|
call sleep(1)
|
||||||
|
cycle
|
||||||
|
else
|
||||||
|
old_state(1:64) = zmq_state(1:64)
|
||||||
|
endif
|
||||||
|
print *, trim(zmq_state)
|
||||||
|
endif
|
||||||
|
|
||||||
|
IRP_IF MPI
|
||||||
|
call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
print *, irp_here, 'error in broadcast of zmq_state'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
if(zmq_state(1:7) == 'Stopped') then
|
if(zmq_state(1:7) == 'Stopped') then
|
||||||
|
|
||||||
exit
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
else if (zmq_state(1:9) == 'selection') then
|
|
||||||
|
if (zmq_state(1:9) == 'selection') then
|
||||||
|
|
||||||
! Selection
|
! Selection
|
||||||
! ---------
|
! ---------
|
||||||
|
|
||||||
call wall_time(t0)
|
call wall_time(t0)
|
||||||
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
|
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
|
||||||
|
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle
|
||||||
|
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle
|
||||||
|
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
|
||||||
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
|
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
|
||||||
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
|
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
|
||||||
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
|
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
|
||||||
|
psi_energy(1:N_states) = energy(1:N_states)
|
||||||
|
TOUCH psi_energy state_average_weight threshold_selectors threshold_generators
|
||||||
|
|
||||||
|
if (mpi_master) then
|
||||||
|
print *, 'N_det', N_det
|
||||||
|
print *, 'N_det_generators', N_det_generators
|
||||||
|
print *, 'N_det_selectors', N_det_selectors
|
||||||
|
print *, 'psi_energy', psi_energy
|
||||||
|
print *, 'pt2_stoch_istate', pt2_stoch_istate
|
||||||
|
print *, 'state_average_weight', state_average_weight
|
||||||
|
endif
|
||||||
call wall_time(t1)
|
call wall_time(t1)
|
||||||
call write_double(6,(t1-t0),'Broadcast time')
|
call write_double(6,(t1-t0),'Broadcast time')
|
||||||
|
|
||||||
@ -73,42 +106,50 @@ subroutine run_wf
|
|||||||
call run_selection_slave(0,i,energy)
|
call run_selection_slave(0,i,energy)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
print *, 'Selection done'
|
print *, 'Selection done'
|
||||||
|
IRP_IF MPI
|
||||||
|
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
print *, irp_here, 'error in barrier'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
print *, 'All selection done'
|
||||||
|
if (N_det < 100000) then
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
else if (zmq_state(1:3) == 'pt2') then
|
else if (zmq_state(1:8) == 'davidson') then
|
||||||
|
|
||||||
! PT2
|
! Davidson
|
||||||
! ---
|
! --------
|
||||||
|
|
||||||
print *, 'PT2'
|
|
||||||
call wall_time(t0)
|
call wall_time(t0)
|
||||||
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
|
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
|
||||||
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
|
if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
|
||||||
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
|
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle
|
||||||
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
|
|
||||||
|
|
||||||
call wall_time(t1)
|
call wall_time(t1)
|
||||||
call write_double(6,(t1-t0),'Broadcast time')
|
if (mpi_master) then
|
||||||
|
call write_double(6,(t1-t0),'Broadcast time')
|
||||||
|
endif
|
||||||
|
|
||||||
logical :: lstop
|
call omp_set_nested(.True.)
|
||||||
lstop = .False.
|
call davidson_slave_tcp(0)
|
||||||
!$OMP PARALLEL PRIVATE(i)
|
call omp_set_nested(.False.)
|
||||||
i = omp_get_thread_num()
|
print *, 'Davidson done'
|
||||||
call run_pt2_slave(0,i,energy,lstop)
|
IRP_IF MPI
|
||||||
!$OMP END PARALLEL
|
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||||
print *, 'PT2 done'
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
print *, irp_here, 'error in barrier'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
print *, 'All Davidson done'
|
||||||
|
exit
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
IRP_IF MPI
|
|
||||||
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
|
||||||
if (ierr /= MPI_SUCCESS) then
|
|
||||||
print *, irp_here, 'error in barrier'
|
|
||||||
endif
|
|
||||||
IRP_ENDIF
|
|
||||||
|
|
||||||
end do
|
end do
|
||||||
IRP_IF MPI
|
IRP_IF MPI
|
||||||
call MPI_finalize(i)
|
call MPI_finalize(ierr)
|
||||||
IRP_ENDIF
|
IRP_ENDIF
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -97,7 +97,17 @@ subroutine ZMQ_selection(N_in, pt2)
|
|||||||
print *, irp_here, ': Failed in zmq_set_running'
|
print *, irp_here, ': Failed in zmq_set_running'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
|
integer :: nproc_target
|
||||||
|
nproc_target = nproc
|
||||||
|
double precision :: mem
|
||||||
|
mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3)
|
||||||
|
call write_double(6,mem,'Estimated memory/thread (Gb)')
|
||||||
|
if (qp_max_mem > 0) then
|
||||||
|
nproc_target = max(1,int(dble(qp_max_mem)/mem))
|
||||||
|
nproc_target = min(nproc_target,nproc)
|
||||||
|
endif
|
||||||
|
|
||||||
|
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc_target+1)
|
||||||
i = omp_get_thread_num()
|
i = omp_get_thread_num()
|
||||||
if (i==0) then
|
if (i==0) then
|
||||||
call selection_collector(zmq_socket_pull, b, N, pt2)
|
call selection_collector(zmq_socket_pull, b, N, pt2)
|
||||||
|
@ -403,8 +403,8 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
|
|||||||
call getenv('NTHREADS_DAVIDSON',env)
|
call getenv('NTHREADS_DAVIDSON',env)
|
||||||
if (trim(env) /= '') then
|
if (trim(env) /= '') then
|
||||||
read(env,*) nthreads_davidson
|
read(env,*) nthreads_davidson
|
||||||
|
call write_int(6,nthreads_davidson,'Number of threads for <Psi|H|Psi>')
|
||||||
endif
|
endif
|
||||||
call write_int(6,nthreads_davidson,'Number of threads for Diagonalization')
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,6 +138,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
integer :: shift, shift2, itermax, istate
|
integer :: shift, shift2, itermax, istate
|
||||||
double precision :: r1, r2
|
double precision :: r1, r2
|
||||||
logical :: state_ok(N_st_diag*davidson_sze_max)
|
logical :: state_ok(N_st_diag*davidson_sze_max)
|
||||||
|
integer :: nproc_target
|
||||||
include 'constants.include.F'
|
include 'constants.include.F'
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
|
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
|
||||||
@ -162,8 +163,22 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
call write_int(6,N_st,'Number of states')
|
call write_int(6,N_st,'Number of states')
|
||||||
call write_int(6,N_st_diag,'Number of states in diagonalization')
|
call write_int(6,N_st_diag,'Number of states in diagonalization')
|
||||||
call write_int(6,sze,'Number of determinants')
|
call write_int(6,sze,'Number of determinants')
|
||||||
|
nproc_target = nproc
|
||||||
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
||||||
+ 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
+ 3.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
||||||
|
if (qp_max_mem > 0) then
|
||||||
|
do while (r1 > qp_max_mem)
|
||||||
|
nproc_target = nproc_target - 1
|
||||||
|
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
||||||
|
+ 3.d0*(N_st_diag*itermax)+nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
||||||
|
if (nproc_target == 0) then
|
||||||
|
nproc_target = 1
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
call omp_set_num_threads(nproc_target)
|
||||||
|
call write_int(6,nproc_target,'Number of threads for diagonalization')
|
||||||
|
endif
|
||||||
call write_double(6, r1, 'Memory(Gb)')
|
call write_double(6, r1, 'Memory(Gb)')
|
||||||
write(6,'(A)') ''
|
write(6,'(A)') ''
|
||||||
write_buffer = '====='
|
write_buffer = '====='
|
||||||
@ -512,6 +527,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
y, s_, s_tmp, &
|
y, s_, s_tmp, &
|
||||||
lambda &
|
lambda &
|
||||||
)
|
)
|
||||||
|
call omp_set_num_threads(nproc)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@ -286,6 +286,23 @@ BEGIN_PROVIDER [ integer, nproc ]
|
|||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ integer, qp_max_mem ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Maximum memory in Gb
|
||||||
|
END_DOC
|
||||||
|
character*(128) :: env
|
||||||
|
|
||||||
|
qp_max_mem = -1
|
||||||
|
call getenv('QP_MAXMEM',env)
|
||||||
|
if (trim(env) /= '') then
|
||||||
|
read(env,*) qp_max_mem
|
||||||
|
call write_int(6,qp_max_mem,'Target maximum memory')
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
double precision function u_dot_v(u,v,sze)
|
double precision function u_dot_v(u,v,sze)
|
||||||
implicit none
|
implicit none
|
||||||
|
@ -246,7 +246,7 @@ IRP_ENDIF
|
|||||||
! stop 'Unable to set ZMQ_RCVBUF on pull socket'
|
! stop 'Unable to set ZMQ_RCVBUF on pull socket'
|
||||||
! endif
|
! endif
|
||||||
|
|
||||||
rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,1,4)
|
rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,nproc,4)
|
||||||
if (rc /= 0) then
|
if (rc /= 0) then
|
||||||
stop 'Unable to set ZMQ_RCVHWM on pull socket'
|
stop 'Unable to set ZMQ_RCVHWM on pull socket'
|
||||||
endif
|
endif
|
||||||
@ -323,7 +323,7 @@ IRP_ENDIF
|
|||||||
stop 'Unable to set ZMQ_LINGER on push socket'
|
stop 'Unable to set ZMQ_LINGER on push socket'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,1,4)
|
rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,3,4)
|
||||||
if (rc /= 0) then
|
if (rc /= 0) then
|
||||||
stop 'Unable to set ZMQ_SNDHWM on push socket'
|
stop 'Unable to set ZMQ_SNDHWM on push socket'
|
||||||
endif
|
endif
|
||||||
|
Loading…
Reference in New Issue
Block a user