9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-30 15:15:38 +01:00

Fixing MPI Davidson

This commit is contained in:
Anthony Scemama 2019-01-26 11:18:29 +01:00
parent de1ea90b1e
commit 63fcb619aa
2 changed files with 88 additions and 11 deletions

View File

@ -173,6 +173,7 @@ subroutine run_slave_main
call davidson_slave_tcp(0) call davidson_slave_tcp(0)
call omp_set_nested(.False.) call omp_set_nested(.False.)
print *, mpi_rank, ': Davidson done' print *, mpi_rank, ': Davidson done'
IRP_IF MPI IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr) call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then if (ierr /= MPI_SUCCESS) then
@ -223,14 +224,6 @@ subroutine run_slave_main
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
pt2_e0_denominator(1:N_states) = energy(1:N_states) pt2_e0_denominator(1:N_states) = energy(1:N_states)
SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'pt2_e0_denominator', pt2_e0_denominator
print *, 'pt2_stoch_istate', pt2_stoch_istate
print *, 'state_average_weight', state_average_weight
endif
call wall_time(t1) call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time') call write_double(6,(t1-t0),'Broadcast time')
@ -281,7 +274,18 @@ subroutine run_slave_main
nproc_target = nproc_target - 1 nproc_target = nproc_target - 1
enddo enddo
!$OMP PARALLEL PRIVATE(i)
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'pt2_e0_denominator', pt2_e0_denominator
print *, 'pt2_stoch_istate', pt2_stoch_istate
print *, 'state_average_weight', state_average_weight
print *, 'Number of threads', nproc_target
endif
!$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
i = omp_get_thread_num() i = omp_get_thread_num()
call run_pt2_slave(0,i,pt2_e0_denominator) call run_pt2_slave(0,i,pt2_e0_denominator)
!$OMP END PARALLEL !$OMP END PARALLEL

View File

@ -35,7 +35,7 @@ subroutine davidson_run_slave(thread,iproc)
integer(ZMQ_PTR) :: zmq_socket_push integer(ZMQ_PTR) :: zmq_socket_push
integer, external :: connect_to_taskserver integer, external :: connect_to_taskserver
integer, external :: zmq_get_N_states_diag integer, external :: zmq_get_N_states_diag_notouch
PROVIDE mpi_rank PROVIDE mpi_rank
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
@ -68,10 +68,23 @@ subroutine davidson_run_slave(thread,iproc)
if (doexit == 0) then if (doexit == 0) then
exit exit
else else
print *, irp_here, ': retrying connection (', doexit, ')' if (thread == 0) then
print *, irp_here, ': Connection failed. Exiting Davidson.'
return
endif
endif endif
enddo enddo
do
if (zmq_get_N_states_diag_notouch(zmq_to_qp_run_socket,1) == -1) then
call sleep(1)
print *, irp_here, ': waiting for N_states_diag'
else
exit
endif
enddo
SOFT_TOUCH N_states_diag
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id) call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
integer, external :: disconnect_from_taskserver integer, external :: disconnect_from_taskserver
@ -575,3 +588,63 @@ integer function zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id)
endif endif
IRP_ENDIF IRP_ENDIF
end end
integer function zmq_get_N_states_diag_notouch(zmq_to_qp_run_socket, worker_id)
use f77_zmq
implicit none
BEGIN_DOC
! Get N_states_diag from the qp_run scheduler
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer, intent(in) :: worker_id
integer :: rc
character*(256) :: msg
zmq_get_N_states_diag_notouch = 0
if (mpi_master) then
write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, 'N_states_diag'
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
if (rc /= len(trim(msg))) go to 10
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:14) /= 'get_data_reply') go to 10
rc = f77_zmq_recv(zmq_to_qp_run_socket,N_states_diag,4,0)
if (rc /= 4) go to 10
endif
IRP_IF MPI_DEBUG
print *, irp_here, mpi_rank
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
IRP_ENDIF
IRP_IF MPI
include 'mpif.h'
integer :: ierr
call MPI_BCAST (zmq_get_N_states_diag_notouch, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast N_states'
stop -1
endif
if (zmq_get_N_states_diag_notouch == 0) then
call MPI_BCAST (N_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast N_states'
stop -1
endif
endif
IRP_ENDIF
return
! Exception
10 continue
zmq_get_N_states_diag_notouch = -1
IRP_IF MPI
call MPI_BCAST (zmq_get_N_states_diag_notouch, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast N_states'
stop -1
endif
IRP_ENDIF
end