diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 45b438f2..68fa133f 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -41,7 +41,7 @@ let debug_env = let debug str = if debug_env then - Printf.printf "TASK : %s%!" str + Printf.eprintf "TASK : %s%!" str diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 7cfcb91d..92ba5b7b 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -30,7 +30,7 @@ subroutine run_pt2_slave(thread,iproc,energy) zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) if(worker_id == -1) then - print *, "WORKER -1" + print *, 'WORKER -1' call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) return @@ -62,7 +62,7 @@ subroutine run_pt2_slave(thread,iproc,energy) endif if(done .or. (ctask == size(task_id)) ) then - if(buf%N == 0 .and. ctask > 0) stop "uninitialized selection_buffer" + if(buf%N == 0 .and. ctask > 0) stop 'uninitialized selection_buffer' do i=1, ctask call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) end do @@ -97,26 +97,30 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + if(rc /= 4) stop 'push' rc = f77_zmq_send( zmq_socket_push, index, 4, ZMQ_SNDMORE) - if(rc /= 4*N) stop "push" + if(rc /= 4*N) stop 'push' rc = f77_zmq_send( zmq_socket_push, pt2_detail, 8*N_states*N, ZMQ_SNDMORE) - if(rc /= 8*N_states*N) stop "push" + if(rc /= 8*N_states*N) stop 'push' rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + if(rc /= 4) stop 'push' rc = f77_zmq_send( zmq_socket_push, task_id, ntask*4, 0) - if(rc /= 4*ntask) stop "push" + if(rc /= 4*ntask) stop 'push' ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH IRP_ELSE character*(2) :: ok rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif IRP_ENDIF end subroutine @@ -133,24 +137,28 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas integer :: rc, rn, i rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop 'pull' rc = f77_zmq_recv( zmq_socket_pull, index, 4, 0) - if(rc /= 4*N) stop "pull" + if(rc /= 4*N) stop 'pull' rc = f77_zmq_recv( zmq_socket_pull, pt2_detail, N_states*8*N, 0) - if(rc /= 8*N_states*N) stop "pull" + if(rc /= 8*N_states*N) stop 'pull' rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop 'pull' rc = f77_zmq_recv( zmq_socket_pull, task_id, ntask*4, 0) - if(rc /= 4*ntask) stop "pull" + if(rc /= 4*ntask) stop 'pull' ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif IRP_ENDIF end subroutine diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index d6f5739f..59b92b0d 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -132,7 +132,12 @@ subroutine push_selection_results(zmq_socket_push, pt2, b, task_id, ntask) ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH IRP_ELSE - rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2) /= 'ok')) then + print *, irp_here//': error in receiving ok' + stop -1 + endif IRP_ENDIF end subroutine @@ -186,9 +191,12 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, val, det, N, task_id, nt ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE - rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here//': error in sending ok' + stop -1 + endif IRP_ENDIF - end subroutine diff --git a/plugins/MPI/broadcast.irp.f b/plugins/MPI/broadcast.irp.f index 0662052b..148a81b1 100644 --- a/plugins/MPI/broadcast.irp.f +++ b/plugins/MPI/broadcast.irp.f @@ -10,9 +10,8 @@ subroutine broadcast_chunks_$double(A, LDA) ! Broadcast with chunks of ~2GB END_DOC integer :: i, sze, ierr - do i=1,LDA,2000000000/$8 - sze = min(LDA-i+1, 2000000000/$8) -! call MPI_BARRIER(MPI_COMM_WORLD) + do i=1,LDA,200000000/$8 + sze = min(LDA-i+1, 200000000/$8) call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast chuks $double ', i @@ -44,21 +43,18 @@ subroutine mpi_bcast_psi(energy, size_energy) include 'mpif.h' integer :: ierr -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (N_states, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast N_states' stop -1 endif -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (N_det, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast N_det' stop -1 endif -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (psi_det_size, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast psi_det_size' @@ -78,7 +74,6 @@ subroutine mpi_bcast_psi(energy, size_energy) TOUCH psi_det psi_coef endif -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (N_det_generators, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast N_det_generators' @@ -89,7 +84,6 @@ subroutine mpi_bcast_psi(energy, size_energy) TOUCH N_det_generators endif -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (N_det_selectors, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast N_det_selectors' @@ -100,13 +94,11 @@ subroutine mpi_bcast_psi(energy, size_energy) TOUCH N_det_selectors endif -! call MPI_BARRIER(MPI_COMM_WORLD) call MPI_BCAST (energy, size(energy), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast energy' stop -1 endif -! call MPI_BARRIER(MPI_COMM_WORLD) IRP_ENDIF end diff --git a/src/Davidson/NEEDED_CHILDREN_MODULES b/src/Davidson/NEEDED_CHILDREN_MODULES index eb438a3c..aae89501 100644 --- a/src/Davidson/NEEDED_CHILDREN_MODULES +++ b/src/Davidson/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Determinants MPI +Determinants diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 24f2f947..a6af704a 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -38,7 +38,7 @@ subroutine davidson_run_slave(thread,iproc) zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) if(worker_id == -1) then - print *, "WORKER -1" + print *, 'WORKER -1' call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) return @@ -160,28 +160,28 @@ subroutine davidson_push_results(zmq_socket_push, v_t, s_t, imin, imax, task_id) sz = (imax-imin+1)*N_states_diag rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "davidson_push_results failed to push task_id" + if(rc /= 4) stop 'davidson_push_results failed to push task_id' rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "davidson_push_results failed to push imin" + if(rc /= 4) stop 'davidson_push_results failed to push imin' rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "davidson_push_results failed to push imax" + if(rc /= 4) stop 'davidson_push_results failed to push imax' rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE) - if(rc8 /= 8_8*sz) stop "davidson_push_results failed to push vt" + if(rc8 /= 8_8*sz) stop 'davidson_push_results failed to push vt' rc8 = f77_zmq_send8( zmq_socket_push, s_t(1,imin), 8_8*sz, 0) - if(rc8 /= 8_8*sz) stop "davidson_push_results failed to push st" + if(rc8 /= 8_8*sz) stop 'davidson_push_results failed to push st' ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH IRP_ELSE - integer :: idummy - rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) - if (rc /= 4) then - print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' - stop 'error' + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2)/='ok')) then + print *, irp_here, ': f77_zmq_recv( zmq_socket_push, ok, 2, 0)' + stop -1 endif IRP_ENDIF @@ -202,29 +202,29 @@ subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id) integer*8 :: rc8 rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + if(rc /= 4) stop 'davidson_pull_results failed to pull task_id' rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull imin" + if(rc /= 4) stop 'davidson_pull_results failed to pull imin' rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) - if(rc /= 4) stop "davidson_pull_results failed to pull imax" + if(rc /= 4) stop 'davidson_pull_results failed to pull imax' sz = (imax-imin+1)*N_states_diag rc8 = f77_zmq_recv8( zmq_socket_pull, v_t(1,imin), 8_8*sz, 0) - if(rc8 /= 8*sz) stop "davidson_pull_results failed to pull v_t" + if(rc8 /= 8*sz) stop 'davidson_pull_results failed to pull v_t' rc8 = f77_zmq_recv8( zmq_socket_pull, s_t(1,imin), 8_8*sz, 0) - if(rc8 /= 8*sz) stop "davidson_pull_results failed to pull s_t" + if(rc8 /= 8*sz) stop 'davidson_pull_results failed to pull s_t' ! Activate if zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE - rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) - if (rc /= 4) then + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' - stop 'error' + stop -1 endif IRP_ENDIF