10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-07-11 22:03:55 +02:00

Added error messages in davidson slave

This commit is contained in:
Anthony Scemama 2019-01-25 17:02:44 +01:00
parent fe86a9b9ed
commit 0992b52dcf
2 changed files with 29 additions and 25 deletions

0
external/Python/.gitignore vendored Normal file
View File

View File

@ -37,43 +37,46 @@ subroutine davidson_run_slave(thread,iproc)
integer, external :: connect_to_taskserver integer, external :: connect_to_taskserver
integer, external :: zmq_get_N_states_diag integer, external :: zmq_get_N_states_diag
PROVIDE mpi_rank
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
integer :: ierr, doexit integer :: ierr, doexit
doexit = 0 do
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then doexit = 0
call sleep(1)
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
doexit=1 call sleep( int(1.5+float(mpi_rank)/10.) )
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
doexit=1
endif
endif endif
endif
IRP_IF MPI IRP_IF MPI
include 'mpif.h' include 'mpif.h'
integer :: sendbuf, recvbuf integer :: sendbuf, recvbuf
sendbuf = doexit sendbuf = doexit
recvbuf = doexit recvbuf = doexit
call MPI_ALLREDUCE(sendbuf, recvbuf, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) call MPI_ALLREDUCE(sendbuf, recvbuf, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to reduce ' print *, irp_here//': Unable to reduce '
stop -1 stop -1
endif
doexit = recvbuf
IRP_ENDIF
if (doexit == 0) then
exit
else
print *, irp_here, ': retrying connection (', doexit, ')'
endif endif
doexit = recvbuf enddo
IRP_ENDIF
if (doexit > 0) then
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
return
endif
zmq_socket_push = new_zmq_push_socket(thread)
do do
if (zmq_get_N_states_diag(zmq_to_qp_run_socket, 1) /= -1) then if (zmq_get_N_states_diag(zmq_to_qp_run_socket, 1) /= -1) then
exit exit
endif endif
print *, 'Waiting for N_states_diag in ', irp_here print *, irp_here, ': Waiting for N_states_diag'
call sleep(1) call sleep(1)
enddo enddo
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id) call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
@ -82,6 +85,7 @@ subroutine davidson_run_slave(thread,iproc)
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
call sleep(1) call sleep(1)
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
print *, irp_here, ': disconnect failed'
continue continue
endif endif
endif endif