10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-11-04 13:13:57 +01:00

Memory model

This commit is contained in:
Anthony Scemama 2018-06-08 21:37:08 +02:00
parent ef92609192
commit 6d05caffd5
7 changed files with 130 additions and 36 deletions

View File

@ -139,7 +139,17 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
endif endif
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & integer :: nproc_target
nproc_target = nproc
double precision :: mem
mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3)
call write_double(6,mem,'Estimated memory/thread (Gb)')
if (qp_max_mem > 0) then
nproc_target = max(1,int(dble(qp_max_mem)/mem))
nproc_target = min(nproc_target,nproc)
endif
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) &
!$OMP PRIVATE(i) !$OMP PRIVATE(i)
i = omp_get_thread_num() i = omp_get_thread_num()
if (i==0) then if (i==0) then

View File

@ -29,11 +29,13 @@ subroutine run_wf
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision :: energy(N_states) double precision :: energy(N_states)
character*(64) :: states(2) character*(64) :: states(3)
character*(64) :: old_state
integer :: rc, i, ierr integer :: rc, i, ierr
double precision :: t0, t1 double precision :: t0, t1
integer, external :: zmq_get_dvector, zmq_get_N_det_generators integer, external :: zmq_get_dvector, zmq_get_N_det_generators
integer, external :: zmq_get_ivector
integer, external :: zmq_get_psi, zmq_get_N_det_selectors integer, external :: zmq_get_psi, zmq_get_N_det_selectors
integer, external :: zmq_get_N_states_diag integer, external :: zmq_get_N_states_diag
@ -41,30 +43,61 @@ subroutine run_wf
zmq_context = f77_zmq_ctx_new () zmq_context = f77_zmq_ctx_new ()
states(1) = 'selection' states(1) = 'selection'
states(2) = 'pt2' states(2) = 'davidson'
states(3) = 'pt2'
old_state = 'Waiting'
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
do do
if (mpi_master) then
call wait_for_states(states,zmq_state,size(states)) call wait_for_states(states,zmq_state,size(states))
if (zmq_state(1:64) == old_state(1:64)) then
call sleep(1)
cycle
else
old_state(1:64) = zmq_state(1:64)
endif
print *, trim(zmq_state) print *, trim(zmq_state)
endif
IRP_IF MPI
call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in broadcast of zmq_state'
endif
IRP_ENDIF
if(zmq_state(1:7) == 'Stopped') then if(zmq_state(1:7) == 'Stopped') then
exit exit
endif
else if (zmq_state(1:9) == 'selection') then
if (zmq_state(1:9) == 'selection') then
! Selection ! Selection
! --------- ! ---------
call wall_time(t0) call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
psi_energy(1:N_states) = energy(1:N_states)
TOUCH psi_energy state_average_weight threshold_selectors threshold_generators
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'psi_energy', psi_energy
print *, 'pt2_stoch_istate', pt2_stoch_istate
print *, 'state_average_weight', state_average_weight
endif
call wall_time(t1) call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time') call write_double(6,(t1-t0),'Broadcast time')
@ -73,42 +106,50 @@ subroutine run_wf
call run_selection_slave(0,i,energy) call run_selection_slave(0,i,energy)
!$OMP END PARALLEL !$OMP END PARALLEL
print *, 'Selection done' print *, 'Selection done'
else if (zmq_state(1:3) == 'pt2') then
! PT2
! ---
print *, 'PT2'
call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time')
logical :: lstop
lstop = .False.
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call run_pt2_slave(0,i,energy,lstop)
!$OMP END PARALLEL
print *, 'PT2 done'
endif
IRP_IF MPI IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr) call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier' print *, irp_here, 'error in barrier'
endif endif
IRP_ENDIF IRP_ENDIF
print *, 'All selection done'
if (N_det < 100000) then
exit
endif
else if (zmq_state(1:8) == 'davidson') then
! Davidson
! --------
call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle
call wall_time(t1)
if (mpi_master) then
call write_double(6,(t1-t0),'Broadcast time')
endif
call omp_set_nested(.True.)
call davidson_slave_tcp(0)
call omp_set_nested(.False.)
print *, 'Davidson done'
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
print *, 'All Davidson done'
exit
endif
end do end do
IRP_IF MPI IRP_IF MPI
call MPI_finalize(i) call MPI_finalize(ierr)
IRP_ENDIF IRP_ENDIF
end end

View File

@ -97,7 +97,17 @@ subroutine ZMQ_selection(N_in, pt2)
print *, irp_here, ': Failed in zmq_set_running' print *, irp_here, ': Failed in zmq_set_running'
endif endif
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) integer :: nproc_target
nproc_target = nproc
double precision :: mem
mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3)
call write_double(6,mem,'Estimated memory/thread (Gb)')
if (qp_max_mem > 0) then
nproc_target = max(1,int(dble(qp_max_mem)/mem))
nproc_target = min(nproc_target,nproc)
endif
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc_target+1)
i = omp_get_thread_num() i = omp_get_thread_num()
if (i==0) then if (i==0) then
call selection_collector(zmq_socket_pull, b, N, pt2) call selection_collector(zmq_socket_pull, b, N, pt2)

View File

@ -403,8 +403,8 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
call getenv('NTHREADS_DAVIDSON',env) call getenv('NTHREADS_DAVIDSON',env)
if (trim(env) /= '') then if (trim(env) /= '') then
read(env,*) nthreads_davidson read(env,*) nthreads_davidson
call write_int(6,nthreads_davidson,'Number of threads for <Psi|H|Psi>')
endif endif
call write_int(6,nthreads_davidson,'Number of threads for Diagonalization')
END_PROVIDER END_PROVIDER

View File

@ -138,6 +138,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
integer :: shift, shift2, itermax, istate integer :: shift, shift2, itermax, istate
double precision :: r1, r2 double precision :: r1, r2
logical :: state_ok(N_st_diag*davidson_sze_max) logical :: state_ok(N_st_diag*davidson_sze_max)
integer :: nproc_target
include 'constants.include.F' include 'constants.include.F'
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
@ -162,8 +163,22 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
call write_int(6,N_st,'Number of states') call write_int(6,N_st,'Number of states')
call write_int(6,N_st_diag,'Number of states in diagonalization') call write_int(6,N_st_diag,'Number of states in diagonalization')
call write_int(6,sze,'Number of determinants') call write_int(6,sze,'Number of determinants')
nproc_target = nproc
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 & r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
+ 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3) + 3.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
if (qp_max_mem > 0) then
do while (r1 > qp_max_mem)
nproc_target = nproc_target - 1
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
+ 3.d0*(N_st_diag*itermax)+nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
if (nproc_target == 0) then
nproc_target = 1
exit
endif
enddo
call omp_set_num_threads(nproc_target)
call write_int(6,nproc_target,'Number of threads for diagonalization')
endif
call write_double(6, r1, 'Memory(Gb)') call write_double(6, r1, 'Memory(Gb)')
write(6,'(A)') '' write(6,'(A)') ''
write_buffer = '=====' write_buffer = '====='
@ -512,6 +527,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
y, s_, s_tmp, & y, s_, s_tmp, &
lambda & lambda &
) )
call omp_set_num_threads(nproc)
end end

View File

@ -286,6 +286,23 @@ BEGIN_PROVIDER [ integer, nproc ]
!$OMP END PARALLEL !$OMP END PARALLEL
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ integer, qp_max_mem ]
implicit none
BEGIN_DOC
! Maximum memory in Gb
END_DOC
character*(128) :: env
qp_max_mem = -1
call getenv('QP_MAXMEM',env)
if (trim(env) /= '') then
read(env,*) qp_max_mem
call write_int(6,qp_max_mem,'Target maximum memory')
endif
END_PROVIDER
double precision function u_dot_v(u,v,sze) double precision function u_dot_v(u,v,sze)
implicit none implicit none

View File

@ -246,7 +246,7 @@ IRP_ENDIF
! stop 'Unable to set ZMQ_RCVBUF on pull socket' ! stop 'Unable to set ZMQ_RCVBUF on pull socket'
! endif ! endif
rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,1,4) rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,nproc,4)
if (rc /= 0) then if (rc /= 0) then
stop 'Unable to set ZMQ_RCVHWM on pull socket' stop 'Unable to set ZMQ_RCVHWM on pull socket'
endif endif
@ -323,7 +323,7 @@ IRP_ENDIF
stop 'Unable to set ZMQ_LINGER on push socket' stop 'Unable to set ZMQ_LINGER on push socket'
endif endif
rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,1,4) rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,3,4)
if (rc /= 0) then if (rc /= 0) then
stop 'Unable to set ZMQ_SNDHWM on push socket' stop 'Unable to set ZMQ_SNDHWM on push socket'
endif endif