mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-21 19:13:29 +01:00
Added error messages in davidson slave
This commit is contained in:
parent
fe86a9b9ed
commit
0992b52dcf
0
external/Python/.gitignore
vendored
Normal file
0
external/Python/.gitignore
vendored
Normal file
@ -37,43 +37,46 @@ subroutine davidson_run_slave(thread,iproc)
|
|||||||
integer, external :: connect_to_taskserver
|
integer, external :: connect_to_taskserver
|
||||||
integer, external :: zmq_get_N_states_diag
|
integer, external :: zmq_get_N_states_diag
|
||||||
|
|
||||||
|
PROVIDE mpi_rank
|
||||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||||
|
zmq_socket_push = new_zmq_push_socket(thread)
|
||||||
|
|
||||||
|
|
||||||
integer :: ierr, doexit
|
integer :: ierr, doexit
|
||||||
doexit = 0
|
do
|
||||||
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
|
doexit = 0
|
||||||
call sleep(1)
|
|
||||||
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
|
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
|
||||||
doexit=1
|
call sleep( int(1.5+float(mpi_rank)/10.) )
|
||||||
|
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
|
||||||
|
doexit=1
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
IRP_IF MPI
|
IRP_IF MPI
|
||||||
include 'mpif.h'
|
include 'mpif.h'
|
||||||
integer :: sendbuf, recvbuf
|
integer :: sendbuf, recvbuf
|
||||||
sendbuf = doexit
|
sendbuf = doexit
|
||||||
recvbuf = doexit
|
recvbuf = doexit
|
||||||
call MPI_ALLREDUCE(sendbuf, recvbuf, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
|
call MPI_ALLREDUCE(sendbuf, recvbuf, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
|
||||||
if (ierr /= MPI_SUCCESS) then
|
if (ierr /= MPI_SUCCESS) then
|
||||||
print *, irp_here//': Unable to reduce '
|
print *, irp_here//': Unable to reduce '
|
||||||
stop -1
|
stop -1
|
||||||
|
endif
|
||||||
|
doexit = recvbuf
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
if (doexit == 0) then
|
||||||
|
exit
|
||||||
|
else
|
||||||
|
print *, irp_here, ': retrying connection (', doexit, ')'
|
||||||
endif
|
endif
|
||||||
doexit = recvbuf
|
enddo
|
||||||
IRP_ENDIF
|
|
||||||
|
|
||||||
if (doexit > 0) then
|
|
||||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
zmq_socket_push = new_zmq_push_socket(thread)
|
|
||||||
|
|
||||||
do
|
do
|
||||||
if (zmq_get_N_states_diag(zmq_to_qp_run_socket, 1) /= -1) then
|
if (zmq_get_N_states_diag(zmq_to_qp_run_socket, 1) /= -1) then
|
||||||
exit
|
exit
|
||||||
endif
|
endif
|
||||||
print *, 'Waiting for N_states_diag in ', irp_here
|
print *, irp_here, ': Waiting for N_states_diag'
|
||||||
call sleep(1)
|
call sleep(1)
|
||||||
enddo
|
enddo
|
||||||
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
|
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
|
||||||
@ -82,6 +85,7 @@ subroutine davidson_run_slave(thread,iproc)
|
|||||||
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
|
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
|
||||||
call sleep(1)
|
call sleep(1)
|
||||||
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
|
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
|
||||||
|
print *, irp_here, ': disconnect failed'
|
||||||
continue
|
continue
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
Loading…
Reference in New Issue
Block a user