mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-07 05:53:37 +01:00
Improving scaling of pt2 with network
This commit is contained in:
parent
c34808d05c
commit
0950d03f09
@ -6,6 +6,11 @@ type req_or_sub = REQ | SUB
|
|||||||
|
|
||||||
let localport = 42379
|
let localport = 42379
|
||||||
|
|
||||||
|
|
||||||
|
let in_time_sum = ref 1.e-9
|
||||||
|
and in_size_sum = ref 0.
|
||||||
|
|
||||||
|
|
||||||
let () =
|
let () =
|
||||||
let open Command_line in
|
let open Command_line in
|
||||||
begin
|
begin
|
||||||
@ -17,6 +22,10 @@ let () =
|
|||||||
doc="Downloads the EZFIO directory." ;
|
doc="Downloads the EZFIO directory." ;
|
||||||
arg=Without_arg; } ;
|
arg=Without_arg; } ;
|
||||||
|
|
||||||
|
{ short='v' ; long="verbose" ; opt=Optional ;
|
||||||
|
doc="Prints the transfer speed." ;
|
||||||
|
arg=Without_arg; } ;
|
||||||
|
|
||||||
anonymous
|
anonymous
|
||||||
"(EZFIO_DIR|ADDRESS)"
|
"(EZFIO_DIR|ADDRESS)"
|
||||||
Mandatory
|
Mandatory
|
||||||
@ -39,6 +48,9 @@ let () =
|
|||||||
ADDRESS x
|
ADDRESS x
|
||||||
in
|
in
|
||||||
|
|
||||||
|
let verbose =
|
||||||
|
Command_line.get_bool "verbose"
|
||||||
|
in
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -145,12 +157,41 @@ let () =
|
|||||||
|
|
||||||
|
|
||||||
let action =
|
let action =
|
||||||
|
if verbose then
|
||||||
|
begin
|
||||||
|
match req_or_sub with
|
||||||
|
| REQ -> (fun () ->
|
||||||
|
let msg =
|
||||||
|
Zmq.Socket.recv_all socket_in
|
||||||
|
in
|
||||||
|
let t0 = Unix.gettimeofday () in
|
||||||
|
Zmq.Socket.send_all socket_out msg;
|
||||||
|
let in_size =
|
||||||
|
float_of_int ( List.fold_left (fun accu x -> accu + String.length x) 0 msg )
|
||||||
|
/. 8192. /. 1024.
|
||||||
|
in
|
||||||
|
let msg =
|
||||||
|
Zmq.Socket.recv_all socket_out
|
||||||
|
in
|
||||||
|
let t1 = Unix.gettimeofday () in
|
||||||
|
Zmq.Socket.send_all socket_in msg;
|
||||||
|
let in_time = t1 -. t0 in
|
||||||
|
in_time_sum := !in_time_sum +. in_time;
|
||||||
|
in_size_sum := !in_size_sum +. in_size;
|
||||||
|
Printf.printf " %16.2f MiB/s -- %16.2f MiB/s\n%!" (in_size /. in_time) (!in_size_sum /. !in_time_sum);
|
||||||
|
)
|
||||||
|
| SUB -> (fun () ->
|
||||||
|
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
match req_or_sub with
|
match req_or_sub with
|
||||||
| REQ -> (fun () ->
|
| REQ -> (fun () ->
|
||||||
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out;
|
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out;
|
||||||
Zmq.Socket.recv_all socket_out |> Zmq.Socket.send_all socket_in )
|
Zmq.Socket.recv_all socket_out |> Zmq.Socket.send_all socket_in )
|
||||||
| SUB -> (fun () ->
|
| SUB -> (fun () ->
|
||||||
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out)
|
Zmq.Socket.recv_all socket_in |> Zmq.Socket.send_all socket_out)
|
||||||
|
end
|
||||||
in
|
in
|
||||||
|
|
||||||
|
|
||||||
@ -171,7 +212,6 @@ let () =
|
|||||||
| None -> ()
|
| None -> ()
|
||||||
| Some Zmq.Poll.In_out
|
| Some Zmq.Poll.In_out
|
||||||
| Some Zmq.Poll.Out -> ()
|
| Some Zmq.Poll.Out -> ()
|
||||||
|
|
||||||
done;
|
done;
|
||||||
|
|
||||||
Zmq.Socket.close socket_in;
|
Zmq.Socket.close socket_in;
|
||||||
@ -363,12 +403,21 @@ let () =
|
|||||||
|> Zmq.Socket.send socket_in
|
|> Zmq.Socket.send socket_in
|
||||||
in
|
in
|
||||||
|
|
||||||
Printf.printf "On remote hosts, create ssh tunnel using:
|
Printf.printf "
|
||||||
ssh -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d %s\n%!"
|
On remote hosts, create ssh tunnel using:
|
||||||
|
ssh -n -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d -L %d:%s:%d %s &
|
||||||
|
Or from this host connect to clients using:
|
||||||
|
ssh -n -R %d:localhost:%d -R %d:localhost:%d -R %d:localhost:%d -R %d:localhost:%d <host> &
|
||||||
|
%!"
|
||||||
(port ) localhost (localport )
|
(port ) localhost (localport )
|
||||||
(port+1) localhost (localport+1)
|
(port+1) localhost (localport+1)
|
||||||
|
(port+2) localhost (localport+2)
|
||||||
(port+9) localhost (localport+9)
|
(port+9) localhost (localport+9)
|
||||||
(Unix.gethostname ());
|
(Unix.gethostname ())
|
||||||
|
(port ) (localport )
|
||||||
|
(port+1) (localport+1)
|
||||||
|
(port+2) (localport+2)
|
||||||
|
(port+9) (localport+9);
|
||||||
Printf.printf "Ready\n%!";
|
Printf.printf "Ready\n%!";
|
||||||
while !run_status do
|
while !run_status do
|
||||||
|
|
||||||
|
@ -141,8 +141,8 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
|
|||||||
b%cur=0
|
b%cur=0
|
||||||
|
|
||||||
! ! Try to adjust n_tasks around nproc/2 seconds per job
|
! ! Try to adjust n_tasks around nproc/2 seconds per job
|
||||||
! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
|
n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
|
||||||
n_tasks = 1
|
! n_tasks = 1
|
||||||
end do
|
end do
|
||||||
|
|
||||||
integer, external :: disconnect_from_taskserver
|
integer, external :: disconnect_from_taskserver
|
||||||
|
Loading…
Reference in New Issue
Block a user