10
1
mirror of https://gitlab.com/scemama/qmcchem.git synced 2024-12-27 23:03:32 +01:00
qmcchem/ocaml/Watchdog.ml

121 lines
2.3 KiB
OCaml
Raw Normal View History

2019-07-23 17:27:02 +02:00
let _list = ref []
let _running = ref false
let _threads = ref []
2015-12-19 02:35:13 +01:00
(** Kill the current process and all children *)
let kill () =
let kill pid =
2019-07-23 17:27:02 +02:00
Unix.kill pid Sys.sigkill;
2019-07-14 18:50:44 +02:00
Printf.printf "Killed %d\n%!" pid
2015-12-19 02:35:13 +01:00
in
2019-07-14 18:50:44 +02:00
List.iter kill (!_list);
2015-12-19 02:35:13 +01:00
exit 1
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Start watchdog *)
let start () =
if (!_running) then
failwith "Watchdog error: Already running"
else
begin
_running := true;
let pause () =
2019-07-14 18:50:44 +02:00
Unix.sleep 1
2015-12-19 02:35:13 +01:00
in
let pid_is_running pid =
2019-07-14 18:50:44 +02:00
Sys.file_exists ("/proc/"^(string_of_int pid)^"/stat")
2015-12-19 02:35:13 +01:00
in
let f () =
while (!_running)
do
pause () ;
(*DEBUG
2019-07-14 18:50:44 +02:00
List.iter (fun x -> Printf.printf "%d\n%!" x) (!_list) ;
2015-12-19 02:35:13 +01:00
*)
let continue () =
2019-07-14 18:50:44 +02:00
List.fold_left
( fun accu x -> accu && (pid_is_running x))
true (!_list)
2015-12-19 02:35:13 +01:00
in
if ( not (continue ()) ) then
kill ()
done
in
_threads := ( (Thread.create f) () ) :: (!_threads)
end
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Stop watchdog *)
let stop () =
if (!_running) then
_running := false
else
failwith "Watchdog error: Already stopped"
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Add a PID to tracking *)
let add pid =
if (not !_running) then
start ();
_list := pid :: (!_list)
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Remove a PID from tracking *)
let del pid =
let rec aux accu = function
| [] -> accu
| a :: rest ->
if (a <> pid) then
aux (a::accu) rest
else
aux accu rest
in
_list := aux [] (!_list);
match (!_list) with
| [] -> if (!_running) then stop ()
| _ -> ()
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Fork and exec a new process *)
2019-07-14 18:50:44 +02:00
let fork_exec ~prog ~args () =
2015-12-19 02:35:13 +01:00
let pid =
2019-07-14 18:50:44 +02:00
match Unix.fork () with
2019-07-23 17:27:02 +02:00
| 0 -> Unix.execvp prog args
2019-07-14 18:50:44 +02:00
| pid -> pid
2015-12-19 02:35:13 +01:00
in
let f () =
add pid;
let success =
2019-07-14 18:50:44 +02:00
match (Unix.waitpid [] pid) with
| pid , Unix.WEXITED n -> true
| pid , Unix.WSIGNALED n ->
( Printf.printf "PID %d killed with signal %d\n%!" pid n;
2015-12-19 02:35:13 +01:00
false )
2019-07-14 18:50:44 +02:00
| pid , Unix.WSTOPPED n ->
( Printf.printf "PID %d stopped with signal %d\n%!" pid n;
2015-12-19 02:35:13 +01:00
false )
in
del pid ;
if (not success) then
kill ()
in
_threads := ( (Thread.create f) () ) :: (!_threads);
pid
2019-07-23 17:27:02 +02:00
2015-12-19 02:35:13 +01:00
(** Wait for threads to finish *)
let join () =
(* if (!_running) then stop (); *)
2019-07-14 18:50:44 +02:00
List.iter Thread.join (!_threads);
2015-12-19 02:35:13 +01:00
assert (not !_running)
2019-07-23 17:27:02 +02:00