10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-09 04:43:13 +01:00

Improving scaling of pt2 with network

This commit is contained in:
Anthony Scemama 2019-07-31 20:56:00 +02:00
parent c34808d05c
commit 0950d03f09
2 changed files with 61 additions and 12 deletions

View File

@ -6,6 +6,11 @@ type req_or_sub = REQ | SUB
let localport = 42379
let in_time_sum = ref 1.e-9
and in_size_sum = ref 0.
let () =
let open Command_line in
begin
@ -17,6 +22,10 @@ let () =
doc="Downloads the EZFIO directory." ;
arg=Without_arg; } ;
{ short='v' ; long="verbose" ; opt=Optional ;
doc="Prints the transfer speed." ;
arg=Without_arg; } ;
anonymous
"(EZFIO_DIR|ADDRESS)"
Mandatory
@ -39,6 +48,9 @@ let () =
ADDRESS x
in
let verbose =
Command_line.get_bool "verbose"
in
@ -145,12 +157,41 @@ let () =
let action =
if verbose then
begin
match req_or_sub with
| REQ -> (fun () ->
let msg =
Zmq.Socket.recv_all socket_in
in
let t0 = Unix.gettimeofday () in
Zmq.Socket.send_all socket_out msg;
let in_size =
float_of_int ( List.fold_left (fun accu x -> accu + String.length x) 0 msg )
/. 8192. /. 1024.
in
let msg =
Zmq.Socket.recv_all socket_out
in
let t1 = Unix.gettimeofday () in
Zmq.Socket.send_all socket_in msg;
let in_time = t1 -. t0 in
in_time_sum := !in_time_sum +. in_time;
in_size_sum := !in_size_sum +. in_size;
Printf.printf " %16.2f MiB/s -- %16.2f MiB/s\n%!" (in_size /. in_time) (!in_size_sum /. !in_time_sum);
)
| SUB -> (fun () ->
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out)
end
else
begin
match req_or_sub with
| REQ -> (fun () ->
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out;
Zmq.Socket.recv_all socket_out |> Zmq.Socket.send_all socket_in )
| SUB -> (fun () ->
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out)
end
in
@ -171,7 +212,6 @@ let () =
| None -> ()
| Some Zmq.Poll.In_out
| Some Zmq.Poll.Out -> ()
done;
Zmq.Socket.close socket_in;
@ -363,12 +403,21 @@ let () =
|> Zmq.Socket.send socket_in
in
Printf.printf "On remote hosts, create ssh tunnel using:
ssh -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d %s\n%!"
Printf.printf "
On remote hosts, create ssh tunnel using:
ssh -n -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d %s &
Or from this host connect to clients using:
ssh -n -R %d:localhost:%d -R %d:localhost:%d -R %d:localhost:%d -R %d:localhost:%d <host> &
%!"
(port ) localhost (localport )
(port+1) localhost (localport+1)
(port+2) localhost (localport+2)
(port+9) localhost (localport+9)
(Unix.gethostname ());
(Unix.gethostname ())
(port ) (localport )
(port+1) (localport+1)
(port+2) (localport+2)
(port+9) (localport+9);
Printf.printf "Ready\n%!";
while !run_status do

View File

@ -141,8 +141,8 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
b%cur=0
! ! Try to adjust n_tasks around nproc/2 seconds per job
! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
n_tasks = 1
n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
! n_tasks = 1
end do
integer, external :: disconnect_from_taskserver