10
1
mirror of https://gitlab.com/scemama/qmcchem.git synced 2024-12-22 12:23:30 +01:00
qmcchem/ocaml/Random_variable.ml

880 lines
21 KiB
OCaml
Raw Normal View History

2016-02-19 11:20:34 +01:00
open Qptypes
2015-12-19 02:35:13 +01:00
2022-01-11 13:41:13 +01:00
type t =
2015-12-19 02:35:13 +01:00
{ property : Property.t ;
data : Block.t list;
}
module Average = struct
2022-01-11 13:41:13 +01:00
include Sample
2015-12-19 02:35:13 +01:00
end
module Error = struct
include Sample
end
module Variance = struct
include Sample
end
module Skewness: sig
2022-01-11 13:41:13 +01:00
type t
2015-12-19 02:35:13 +01:00
val to_float : t -> float
val of_float : float -> t
val to_string : t -> string
end = struct
2022-01-11 13:41:13 +01:00
type t = float
2019-07-14 18:50:44 +02:00
let to_string = string_of_float
2015-12-19 02:35:13 +01:00
let to_float x = x
let of_float x = x
end
module Kurtosis: sig
2022-01-11 13:41:13 +01:00
type t
2015-12-19 02:35:13 +01:00
val to_float : t -> float
val of_float : float -> t
val to_string : t -> string
end = struct
2022-01-11 13:41:13 +01:00
type t = float
2019-07-14 18:50:44 +02:00
let to_string = string_of_float
2015-12-19 02:35:13 +01:00
let to_float x = x
let of_float x = x
end
module GaussianDist: sig
2022-01-11 13:41:13 +01:00
type t
2015-12-19 02:35:13 +01:00
val create : mu:Average.t -> sigma2:Variance.t -> t
val eval : g:t -> x:float -> float
end = struct
2022-01-11 13:41:13 +01:00
type t = { mu: Average.t ; sigma2: Variance.t }
2015-12-19 02:35:13 +01:00
let create ~mu ~sigma2 =
{ mu ; sigma2 }
let eval ~g ~x =
2022-01-11 13:41:13 +01:00
let { mu ; sigma2 } =
2015-12-19 02:35:13 +01:00
g
in
2022-01-11 13:41:13 +01:00
let mu =
2015-12-19 02:35:13 +01:00
Average.to_float mu
and sigma2 =
Variance.to_float sigma2
in
let x2 =
(x -. mu) *. ( x -. mu) /. sigma2
in
2022-01-11 13:41:13 +01:00
let pi =
2019-07-14 18:50:44 +02:00
acos (-1.)
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let c =
2015-12-19 02:35:13 +01:00
1. /. (sqrt (sigma2 *. (pi +. pi)))
in
c *. exp ( -0.5 *. x2)
end
2019-07-17 19:10:22 +02:00
let hashtbl_to_alist table =
Hashtbl.fold (fun k v a -> (k,v) :: a) table []
let hashtbl_change table key f =
let elt =
2022-01-11 13:41:13 +01:00
try
2019-07-17 19:10:22 +02:00
Some (Hashtbl.find table key)
with
| Not_found -> None
in
let new_elt = f elt in
match new_elt with
| None -> Hashtbl.remove table key
| Some value -> Hashtbl.replace table key value
2015-12-19 02:35:13 +01:00
2016-02-19 11:20:34 +01:00
(** Build from raw data. Range values are given in percent. *)
let of_raw_data ?(locked=true) ~range property =
2022-01-11 13:41:13 +01:00
let data =
2016-02-19 11:20:34 +01:00
Block.raw_data ~locked ()
2019-07-14 18:50:44 +02:00
|> List.filter (fun x -> x.Block.property = property)
2020-06-02 00:19:45 +02:00
|> List.sort (fun x y ->
let x = Block_id.to_int x.Block.block_id in
let y = Block_id.to_int y.Block.block_id in
if (x>y) then 1
else if (x<y) then -1
else 0)
2016-02-19 11:20:34 +01:00
in
let data_in_range rmin rmax =
let total_weight =
2019-07-14 18:50:44 +02:00
List.fold_left (fun accu x ->
(Weight.to_float x.Block.weight) +. accu
) 0. data
2016-02-19 11:20:34 +01:00
in
2022-01-11 13:41:13 +01:00
2016-02-19 11:20:34 +01:00
let wmin, wmax =
rmin *. total_weight *. 0.01,
rmax *. total_weight *. 0.01
in
let (_, new_data) =
2019-07-14 18:50:44 +02:00
List.fold_left (fun (wsum, l) x ->
2016-02-19 11:20:34 +01:00
if (wsum > wmax) then
(wsum,l)
else
begin
let wsum_new =
wsum +. (Weight.to_float x.Block.weight)
in
if (wsum_new > wmin) then
(wsum_new, x::l)
else
(wsum_new, l)
2022-01-11 13:41:13 +01:00
end
2019-07-14 18:50:44 +02:00
) (0.,[]) data
2016-02-19 11:20:34 +01:00
in
List.rev new_data
in
2022-01-11 13:41:13 +01:00
let result =
2016-02-19 11:20:34 +01:00
match range with
| (0.,100.) -> { property ; data }
| (rmin,rmax) -> { property ; data=data_in_range rmin rmax }
in
result
2015-12-19 02:35:13 +01:00
(** Compute average *)
let average { property ; data } =
if Property.is_scalar property then
2022-01-11 13:41:13 +01:00
let (num,denom) =
2019-07-14 18:50:44 +02:00
List.fold_left (fun (an, ad) x ->
2015-12-19 02:35:13 +01:00
let num =
(Weight.to_float x.Block.weight) *. (Sample.to_float x.Block.value)
and den =
(Weight.to_float x.Block.weight)
in (an +. num, ad +. den)
2019-07-14 18:50:44 +02:00
) (0., 0.) data
2022-01-11 13:41:13 +01:00
in
2015-12-19 02:35:13 +01:00
num /. denom
|> Average.of_float
else
let dim =
match data with
| [] -> 1
| x :: tl -> Sample.dimension x.Block.value
in
2022-01-11 13:41:13 +01:00
let (num,denom) =
2019-07-14 18:50:44 +02:00
List.fold_left (fun (an, ad) x ->
2015-12-19 02:35:13 +01:00
let num =
2019-07-14 18:50:44 +02:00
Array.map (fun y -> (Weight.to_float x.Block.weight) *. y)
2022-01-11 13:41:13 +01:00
(Sample.to_float_array x.Block.value)
2015-12-19 02:35:13 +01:00
and den = (Weight.to_float x.Block.weight)
2022-01-11 13:41:13 +01:00
in ( Array.mapi (fun i y -> y +. num.(i)) an , ad +. den)
2019-07-14 18:50:44 +02:00
) (Array.make dim 0. , 0.) data
2022-01-11 13:41:13 +01:00
in
2015-12-19 02:35:13 +01:00
let denom_inv =
1. /. denom
in
2019-07-14 18:50:44 +02:00
Array.map (fun x -> x *. denom_inv) num
2015-12-19 02:35:13 +01:00
|> Average.of_float_array ~dim
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
2022-01-11 13:41:13 +01:00
2016-05-02 21:19:36 +02:00
2015-12-19 02:35:13 +01:00
(** Compute sum (for CPU/Wall time) *)
let sum { property ; data } =
2019-07-14 18:50:44 +02:00
List.fold_left (fun accu x ->
2015-12-19 02:35:13 +01:00
let num = (Weight.to_float x.Block.weight) *. (Sample.to_float x.Block.value)
in accu +. num
2022-01-11 13:41:13 +01:00
) 0. data
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Calculation of the average and error bar *)
let ave_error { property ; data } =
2022-01-11 13:41:13 +01:00
(* sum: \sum_k x_k *. w_k
ansum: \sum_k w_k
avsum: \sum_k x_k *. w_k
avcu0: avsum / ansum
avsq: \sum_k (1. -. (w_k /. ansum_k)) *. (x_k -. avcu0)^2 *. w_k)
*)
let rec loop ~sum ~avsq ~ansum ~avsum ~n ?idx = function
| [] ->
2015-12-19 02:35:13 +01:00
begin
if (n > 0.) then
( Average.of_float (sum /. ansum),
2019-07-14 18:50:44 +02:00
Some (Error.of_float (sqrt ( abs_float ( avsq /.( ansum *. n)))) ))
2015-12-19 02:35:13 +01:00
else
( Average.of_float (sum /. ansum), None)
end
2022-01-12 00:39:43 +01:00
| (x, w) :: tail ->
2015-12-19 02:35:13 +01:00
begin
2022-01-11 13:41:13 +01:00
let avcu0 = avsum /. ansum in
let xw = x *. w in
2015-12-19 02:35:13 +01:00
let ansum, avsum, sum =
ansum +. w ,
avsum +. xw ,
sum +. xw
in
loop tail
~sum:sum
~avsq:(avsq +. (1. -. (w /. ansum)) *. (x -. avcu0)
*. (x -. avcu0) *. w)
2022-01-11 13:41:13 +01:00
~avsum:avsum
2015-12-19 02:35:13 +01:00
~ansum:ansum
2022-01-11 13:41:13 +01:00
~n:(n +. 1.)
2015-12-19 02:35:13 +01:00
end
in
let ave_error_scalar = function
| [] -> (Average.of_float 0., None)
| (x,w) :: tail ->
loop tail
~sum:(x *. w)
~avsq:0.
~ansum:w
~avsum:(x *. w)
~n:0.
in
if (Property.is_scalar property) then
2020-04-15 14:40:33 +02:00
List.rev_map (fun x ->
2015-12-19 02:35:13 +01:00
(Sample.to_float x.Block.value,
Weight.to_float x.Block.weight)
2019-07-14 18:50:44 +02:00
) data
2022-01-11 13:41:13 +01:00
|> ave_error_scalar
2015-12-19 02:35:13 +01:00
else
match data with
| [] -> (Average.of_float 0., None)
| head::tail as list_of_samples ->
let dim =
head.Block.value
|> Sample.dimension
in
2022-01-11 13:41:13 +01:00
let result =
2019-07-14 18:50:44 +02:00
Array.init dim (fun idx ->
2020-04-15 14:40:33 +02:00
List.rev_map (fun x ->
2015-12-19 02:35:13 +01:00
(Sample.to_float ~idx x.Block.value,
Weight.to_float x.Block.weight)
2019-07-14 18:50:44 +02:00
) list_of_samples
2015-12-19 02:35:13 +01:00
|> ave_error_scalar
)
in
2022-01-11 13:41:13 +01:00
( Array.map (fun (x,_) -> Average.to_float x) result
|> Average.of_float_array ~dim ,
2015-12-19 02:35:13 +01:00
if (Array.length result < 2) then
None
else
2022-01-11 13:41:13 +01:00
Some (Array.map (function
2015-12-19 02:35:13 +01:00
| (_,Some y) -> Error.to_float y
2022-01-11 13:41:13 +01:00
| (_,None) -> 0.) result
2015-12-19 02:35:13 +01:00
|> Average.of_float_array ~dim)
2022-01-11 13:41:13 +01:00
)
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Fold function for block values *)
let fold_blocks ~f { property ; data } =
2019-07-14 18:50:44 +02:00
let init =
try
let block = List.hd data in
Sample.to_float block.Block.value
with
2019-07-17 19:10:22 +02:00
| Failure _ -> 0.
2015-12-19 02:35:13 +01:00
in
2019-07-14 18:50:44 +02:00
List.fold_left (fun accu block ->
2015-12-19 02:35:13 +01:00
let x = Sample.to_float block.Block.value
in f accu x
2022-01-11 13:41:13 +01:00
) init data
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Convergence plot *)
let convergence { property ; data } =
2022-01-11 13:41:13 +01:00
let rec loop ~sum ~avsq ~ansum ~avsum ~n ~accu = function
2015-12-19 02:35:13 +01:00
| [] -> List.rev accu
| head :: tail ->
begin
let x = Sample.to_float head.Block.value
and w = Weight.to_float head.Block.weight
and avcu0 = avsum /. ansum
in
let xw = x *. w
in
let ansum = ansum +. w
and avsum = avsum +. xw
and sum = sum +. xw
in
2022-01-11 13:41:13 +01:00
let accu =
2015-12-19 02:35:13 +01:00
if (n > 0.) then
2019-07-14 18:50:44 +02:00
(sum /. ansum, sqrt ( abs_float ( avsq /.( ansum *. n))))::accu
2015-12-19 02:35:13 +01:00
else
(sum /. ansum, 0.)::accu
in
loop tail
~sum:sum
~avsq:(avsq +. (1. -. (w /. ansum)) *. (x -. avcu0)
*. (x -. avcu0) *. w)
2022-01-11 13:41:13 +01:00
~avsum:avsum
2015-12-19 02:35:13 +01:00
~ansum:ansum
2022-01-11 13:41:13 +01:00
~n:(n +. 1.)
2015-12-19 02:35:13 +01:00
~accu:accu
end
in
match data with
| [] -> []
| head :: tail ->
begin
let x = Sample.to_float head.Block.value
and w = Weight.to_float head.Block.weight
in
let s = x *. w in
loop tail
~sum:s
~avsq:0.
~ansum:w
~avsum:s
~n:0.
~accu:[ (s /. w, 0.) ]
end
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
let rev_convergence { property ; data } =
let p = { property=property ; data = List.rev data } in
convergence p
|> List.rev
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Min and max of block *)
let min_block =
fold_blocks ~f:(fun accu x ->
if (x < accu) then x
else accu
)
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
let max_block =
fold_blocks ~f:(fun accu x ->
if (x > accu) then x
else accu
)
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Create a hash table for merging *)
2022-01-11 13:41:13 +01:00
let create_hash ~create_key ?(update_block_id=(fun x->x))
2016-05-02 21:19:36 +02:00
?(update_value=(fun wc vc wb vb sw -> (wc *. vc +. wb *. vb) /. sw) )
?(update_weight=(fun wc wb -> wc +. wb) ) t =
2019-07-17 19:10:22 +02:00
let table = Hashtbl.create 63
2015-12-19 02:35:13 +01:00
in
2019-07-17 19:10:22 +02:00
List.iter (fun block ->
2015-12-19 02:35:13 +01:00
let key = create_key block
in
let open Block in
2022-01-11 13:41:13 +01:00
hashtbl_change table key (function
2015-12-19 02:35:13 +01:00
| Some current ->
let wc, wb =
Weight.to_float current.weight,
Weight.to_float block.weight
in
let sw =
2016-05-02 21:19:36 +02:00
update_weight wc wb
2015-12-19 02:35:13 +01:00
in
if (Property.is_scalar current.property) then
2022-01-11 13:41:13 +01:00
let vc, vb =
2015-12-19 02:35:13 +01:00
Sample.to_float current.value,
Sample.to_float block.value
in Some
{ property = current.property ;
weight = Weight.of_float sw ;
2016-05-02 21:19:36 +02:00
value = Sample.of_float (update_value wc vc wb vb sw);
2015-12-19 02:35:13 +01:00
block_id = update_block_id block.block_id;
pid = block.pid ;
compute_node = block.compute_node;
}
else
2022-01-11 13:41:13 +01:00
let vc, vb =
2015-12-19 02:35:13 +01:00
Sample.to_float_array current.value,
Sample.to_float_array block.value
2022-01-11 13:41:13 +01:00
and dim =
2015-12-19 02:35:13 +01:00
Sample.dimension current.value
in Some
{ property = current.property ;
weight = Weight.of_float sw ;
2022-01-11 13:41:13 +01:00
value =
2019-07-17 19:10:22 +02:00
Array.init dim (fun i -> update_value wc vc.(i) wb vb.(i) sw)
2015-12-19 02:35:13 +01:00
|> Sample.of_float_array ~dim ;
block_id = update_block_id block.block_id;
pid = block.pid ;
compute_node = block.compute_node;
}
| None -> Some
{ property = block.property ;
weight = block.weight;
value = block.value ;
block_id = update_block_id block.block_id;
pid = block.pid ;
compute_node = block.compute_node;
}
)
2019-07-17 19:10:22 +02:00
) t.data ;
2015-12-19 02:35:13 +01:00
table
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Genergic merge function *)
2018-03-14 17:02:52 +01:00
let merge ~create_key ?update_block_id ?update_value ?update_weight t =
let table = create_hash ~create_key ?update_block_id ?update_value ?update_weight t
2015-12-19 02:35:13 +01:00
in
{ property = t.property ;
2019-07-17 19:10:22 +02:00
data = hashtbl_to_alist table
|> List.sort (fun x y ->
2015-12-19 02:35:13 +01:00
if (x>y) then 1
else if (x<y) then -1
else 0)
2020-06-01 23:30:07 +02:00
|> List.rev_map snd
2020-04-15 14:40:33 +02:00
|> List.rev
2015-12-19 02:35:13 +01:00
}
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Merge per block id *)
let merge_per_block_id =
2022-01-11 13:41:13 +01:00
merge
2018-03-14 17:02:52 +01:00
~create_key:(fun block -> Block_id.to_string block.Block.block_id)
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Merge per compute_node *)
let merge_per_compute_node =
merge
2022-01-11 13:41:13 +01:00
~create_key:(fun block ->
Printf.sprintf "%s"
2015-12-19 02:35:13 +01:00
(Compute_node.to_string block.Block.compute_node) )
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Merge per Compute_node and PID *)
let merge_per_compute_node_and_pid =
merge
2022-01-11 13:41:13 +01:00
~create_key:(fun block ->
Printf.sprintf "%s %10.10d"
2015-12-19 02:35:13 +01:00
(Compute_node.to_string block.Block.compute_node)
2019-07-17 19:10:22 +02:00
(block.Block.pid) )
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Merge per Compute_node and BlockId *)
let merge_per_compute_node_and_block_id =
merge
2022-01-11 13:41:13 +01:00
~create_key:(fun block ->
Printf.sprintf "%s %10.10d"
2015-12-19 02:35:13 +01:00
(Compute_node.to_string block.Block.compute_node)
(Block_id.to_int block.Block.block_id) )
2016-02-19 11:20:34 +01:00
2018-03-14 17:02:52 +01:00
let error_x_over_y = function
2019-07-17 19:10:22 +02:00
| [] -> (Average.of_float 0., None)
| (x,_)::[] -> (Average.of_float x , None)
| (x,w)::tail ->
2018-03-14 17:02:52 +01:00
begin
let avcu0 = ref 0.
and ansum = ref w
and avsum = ref x
and avbl = ref (x /. w)
and avsq = ref 0.
and n = ref 1.
in
let avcu = ref !avbl
in
2019-07-17 19:10:22 +02:00
List.iter (fun (x,w) ->
avcu0 := !avsum /. !ansum;
ansum := !ansum +. w;
avsum := !avsum +. x;
avbl := x /. w ;
if (!ansum <> 0.) then
avcu := !avsum /. !ansum
else ();
avsq := !avsq +. (1. -. w /. !ansum) *. (!avbl -. !avcu0) *. (!avbl -. !avcu0) *. w;
n := !n +. 1.
) tail ;
2018-03-14 17:02:52 +01:00
let arg =
2019-07-14 18:50:44 +02:00
abs_float (!avsq /.(!ansum *. (!n -. 1.)))
2018-03-14 17:02:52 +01:00
in
let error =
sqrt arg
in
(Average.of_float !avcu, Some (Error.of_float error) )
end
2015-12-19 02:35:13 +01:00
2016-05-02 21:51:09 +02:00
(** Create float, variable operators *)
2022-01-11 13:41:13 +01:00
let one_variable_operator ~update_value p f =
{ p with
data = List.rev @@ List.rev_map (fun b -> { b with
2018-03-14 17:02:52 +01:00
Block.value = Sample.of_float (update_value (Sample.to_float b.Block.value) ) }
) p.data }
2016-05-02 21:51:09 +02:00
let ( +@ ) p f = one_variable_operator p f
2018-03-14 17:02:52 +01:00
~update_value: (fun x -> x +. f )
2016-05-02 21:51:09 +02:00
2022-01-11 13:41:13 +01:00
let ( *@ ) p f = one_variable_operator p f
2018-03-14 17:02:52 +01:00
~update_value: (fun x -> x *. f )
2016-05-02 21:51:09 +02:00
2022-01-11 13:41:13 +01:00
let ( -@ ) p f = one_variable_operator p f
2018-03-14 17:02:52 +01:00
~update_value: (fun x -> x -. f )
2016-05-02 21:51:09 +02:00
2022-01-11 13:41:13 +01:00
let ( /@ ) p f = one_variable_operator p f
2018-03-14 17:02:52 +01:00
~update_value: (fun x -> x /. f )
2016-05-02 21:51:09 +02:00
(** Create two variable operators *)
2022-01-11 13:41:13 +01:00
let two_variable_operator ~update_value p1 p2 =
2016-05-02 21:51:09 +02:00
merge
~update_value
2016-05-02 21:19:36 +02:00
~create_key:(fun block ->
2022-01-11 13:41:13 +01:00
Printf.sprintf "%s %10.10d %10.10d"
2016-05-02 21:19:36 +02:00
(Compute_node.to_string block.Block.compute_node)
2022-01-11 13:41:13 +01:00
(Block_id.to_int block.Block.block_id)
2019-07-17 19:10:22 +02:00
(block.Block.pid) )
2016-05-02 21:19:36 +02:00
~update_weight:(fun wc wb -> wc )
{ property = p1.property ;
2022-01-11 13:41:13 +01:00
data = List.concat [ p1.data ; p2.data ] }
2016-05-02 21:19:36 +02:00
2022-01-11 13:41:13 +01:00
let ( +! ) = two_variable_operator
2016-05-02 21:51:09 +02:00
~update_value: (fun wc vc wb vb sw -> (vc +. vb) )
2022-01-11 13:41:13 +01:00
let ( *! ) = two_variable_operator
2016-05-02 21:51:09 +02:00
~update_value: (fun wc vc wb vb sw -> (vc *. vb) )
2022-01-11 13:41:13 +01:00
let ( -! ) = two_variable_operator
2016-05-02 21:51:09 +02:00
~update_value: (fun wc vc wb vb sw -> (vc -. vb) )
2022-01-11 13:41:13 +01:00
let ( /! ) = two_variable_operator
2016-05-02 21:51:09 +02:00
~update_value: (fun wc vc wb vb sw -> (vc /. vb) )
2015-12-19 02:35:13 +01:00
(** Merge two consecutive blocks *)
let compress =
merge
2022-01-11 13:41:13 +01:00
~create_key:(fun block ->
2020-06-01 23:30:07 +02:00
Printf.sprintf "%s %10.10d %10.10d"
(Compute_node.to_string block.Block.compute_node) block.Block.pid
(((Block_id.to_int block.Block.block_id)+1)/2))
2022-01-11 13:41:13 +01:00
~update_block_id:(fun block_id ->
2015-12-19 02:35:13 +01:00
((Block_id.to_int block_id)+1)/2
|> Block_id.of_int )
2016-02-19 11:20:34 +01:00
2016-05-02 21:19:36 +02:00
2015-12-19 02:35:13 +01:00
(** Last value on each compute node (for wall_time) *)
let max_value_per_compute_node t =
2019-07-19 11:46:29 +02:00
let table = Hashtbl.create 63
2015-12-19 02:35:13 +01:00
in
let create_key block =
2022-01-11 13:41:13 +01:00
Printf.sprintf "%s %10.10d"
2015-12-19 02:35:13 +01:00
(Compute_node.to_string block.Block.compute_node)
2022-01-11 13:41:13 +01:00
(block.Block.pid)
2015-12-19 02:35:13 +01:00
in
2019-07-17 19:10:22 +02:00
List.iter (fun block ->
2015-12-19 02:35:13 +01:00
let key = create_key block
in
let open Block in
2022-01-11 13:41:13 +01:00
hashtbl_change table key (function
2015-12-19 02:35:13 +01:00
| Some current ->
let vc = Sample.to_float current.value
and vb = Sample.to_float block.value
in
if (vc > vb) then
Some current
else
Some block
| None -> Some block
)
2019-07-17 19:10:22 +02:00
) t.data ;
2015-12-19 02:35:13 +01:00
{ property = t.property ;
2019-07-17 19:10:22 +02:00
data = hashtbl_to_alist table
|> List.sort (fun x y ->
2015-12-19 02:35:13 +01:00
if (x>y) then 1
else if (x<y) then -1
else 0)
2020-04-15 14:40:33 +02:00
|> List.rev_map (fun (x,y) -> y)
|> List.rev
2015-12-19 02:35:13 +01:00
}
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** String representation *)
2022-01-11 13:41:13 +01:00
let to_string p =
2015-12-19 02:35:13 +01:00
match p.property with
2019-07-19 11:46:29 +02:00
| Property.Cpu -> Printf.sprintf "%s" (Time.string_of_sec (sum p))
| Property.Wall -> Printf.sprintf "%s" (Time.string_of_sec (sum (max_value_per_compute_node p)))
2015-12-19 02:35:13 +01:00
| Property.Accep -> Printf.sprintf "%16.10f" (average p |> Average.to_float)
2022-01-11 13:41:13 +01:00
| _ ->
2015-12-19 02:35:13 +01:00
begin
if Property.is_scalar p.property then
match ave_error p with
2022-01-11 13:41:13 +01:00
| (ave, Some error) ->
let (ave, error) =
Average.to_float ave,
2015-12-19 02:35:13 +01:00
Error.to_float error
in
Printf.sprintf "%16.10f +/- %16.10f" ave error
2022-01-11 13:41:13 +01:00
| (ave, None) ->
let ave =
2015-12-19 02:35:13 +01:00
Average.to_float ave
in
Printf.sprintf "%16.10f" ave
else
2022-01-12 00:39:43 +01:00
begin
2015-12-19 02:35:13 +01:00
match ave_error p with
2022-01-11 13:41:13 +01:00
| (ave, Some error) ->
2015-12-19 02:35:13 +01:00
let idxmax =
Average.dimension ave
in
let rec f accu idx =
if (idx < idxmax) then
2022-01-11 13:41:13 +01:00
let (ave, error) =
Average.to_float ~idx ave,
2015-12-19 02:35:13 +01:00
Error.to_float ~idx error
in
let s =
2022-01-12 00:39:43 +01:00
Printf.sprintf "%8d : %16.10f +/- %16.10f ;" (idx+1) ave error
2015-12-19 02:35:13 +01:00
in
2022-01-12 00:39:43 +01:00
(f [@tailcall]) (s :: accu) (idx+1)
2015-12-19 02:35:13 +01:00
else
2022-01-12 00:39:43 +01:00
List.rev (" ]" :: accu)
2015-12-19 02:35:13 +01:00
in
2022-01-12 00:39:43 +01:00
f ["[ \n"] 0
|> String.concat "\n"
2022-01-11 13:41:13 +01:00
| (ave, None) ->
2015-12-19 02:35:13 +01:00
Average.to_float ave
2022-01-12 00:39:43 +01:00
|> Printf.sprintf "%16.10f%!"
end
2015-12-19 02:35:13 +01:00
end
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Compress block files : Merge all the blocks computed on the same host *)
let compress_files () =
Block._raw_data := None;
let properties =
Lazy.force Block.properties
in
2022-01-11 13:41:13 +01:00
2015-12-19 02:35:13 +01:00
(* Create temporary file *)
let dir_name =
Block.dir_name
in
2022-01-11 13:41:13 +01:00
let dir_name =
2015-12-19 02:35:13 +01:00
Lazy.force dir_name
in
2022-01-11 13:41:13 +01:00
let files =
2019-07-19 11:46:29 +02:00
Sys.readdir dir_name
|> Array.to_list
2022-01-11 13:41:13 +01:00
|> List.filter (fun x ->
2019-07-19 11:46:29 +02:00
try
Str.search_backward (Str.regexp "locked") x (String.length x) >= 0
with
| Not_found -> true
2015-12-19 02:35:13 +01:00
)
2020-04-15 14:40:33 +02:00
|> List.rev_map (fun x -> dir_name^x)
2020-06-01 23:30:07 +02:00
|> List.rev
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let out_channel_dir =
2019-07-19 11:46:29 +02:00
let rand_num = Random.int 1000000 |> string_of_int in
2022-01-11 13:41:13 +01:00
let dirname =
Filename.concat !Ezfio.ezfio_filename "blocks"
2019-07-31 15:07:04 +02:00
in
if not ( Sys.file_exists dirname ) then
Unix.mkdir dirname 0o755;
2019-07-19 11:46:29 +02:00
let tmp_dir =
2019-07-31 15:07:04 +02:00
Filename.concat dirname ("qmc"^rand_num)
2019-07-19 11:46:29 +02:00
in
try
2019-07-23 17:27:02 +02:00
Unix.mkdir tmp_dir 0o755;
2019-07-19 11:46:29 +02:00
tmp_dir
2019-07-23 17:39:51 +02:00
with _ ->
let message = Printf.sprintf "Cannot create temp dir %s" tmp_dir in
raise (Sys_error message)
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let out_channel_name =
2015-12-19 02:35:13 +01:00
let hostname =
2022-01-11 13:41:13 +01:00
Lazy.force Qmcchem_config.hostname
2015-12-19 02:35:13 +01:00
and suffix =
Unix.getpid ()
2022-01-11 13:41:13 +01:00
|> string_of_int
2015-12-19 02:35:13 +01:00
in
2019-07-19 11:46:29 +02:00
String.concat "." [ hostname ; suffix ]
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let block_channel =
2019-07-19 11:46:29 +02:00
Filename.concat out_channel_dir out_channel_name
|> open_out
2015-12-19 02:35:13 +01:00
in
2019-07-17 19:10:22 +02:00
List.iter (fun p ->
2022-01-11 13:41:13 +01:00
let l =
2015-12-19 02:35:13 +01:00
match p with
2022-01-11 13:41:13 +01:00
| Property.Cpu
2015-12-19 02:35:13 +01:00
| Property.Accep ->
2016-02-19 11:20:34 +01:00
of_raw_data ~locked:false ~range:(0.,100.) p
2015-12-19 02:35:13 +01:00
|> merge_per_compute_node
2022-01-11 13:41:13 +01:00
| Property.Wall ->
2016-02-19 11:20:34 +01:00
of_raw_data ~locked:false ~range:(0.,100.) p
2022-01-11 13:41:13 +01:00
|> max_value_per_compute_node
2015-12-19 02:35:13 +01:00
| _ ->
2016-02-19 11:20:34 +01:00
of_raw_data ~locked:false ~range:(0.,100.) p
2020-06-01 23:59:27 +02:00
(*
2015-12-19 02:35:13 +01:00
|> merge_per_compute_node_and_block_id
2020-06-01 23:59:27 +02:00
*)
2015-12-19 02:35:13 +01:00
in
2019-07-17 19:10:22 +02:00
List.iter (fun x ->
2022-01-11 22:12:02 +01:00
output_string block_channel (Block.to_string_or_bytes x);
if not Qmcchem_config.binary_io then
output_char block_channel '\n';
2022-01-11 13:41:13 +01:00
) l.data
) properties ;
2019-07-19 11:46:29 +02:00
close_out block_channel;
2015-12-19 02:35:13 +01:00
2019-07-19 11:46:29 +02:00
List.iter Unix.unlink files ;
Unix.rename (Filename.concat out_channel_dir out_channel_name)
(Filename.concat dir_name out_channel_name);
2015-12-19 02:35:13 +01:00
Unix.rmdir out_channel_dir
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Autocovariance function (not weighted) *)
let autocovariance { property ; data } =
let ave =
average { property ; data }
|> Average.to_float
and data =
match (merge_per_block_id { property ; data })
with { property ; data } -> Array.of_list data
in
2022-01-11 13:41:13 +01:00
let x_t =
2019-07-17 19:10:22 +02:00
Array.map (fun x -> (Sample.to_float x.Block.value) -. ave) data
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let f i =
let denom =
2019-07-14 18:50:44 +02:00
if (i > 1) then (float_of_int i) else 1.
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let r =
2019-07-19 11:46:29 +02:00
Array.sub x_t 0 i
2019-07-17 19:10:22 +02:00
|> Array.fold_left (fun accu x ->
2022-01-11 13:41:13 +01:00
accu +. x *. x_t.(i)) 0.
2015-12-19 02:35:13 +01:00
in
r /. denom
in
2019-07-17 19:10:22 +02:00
Array.init (Array.length data) f
2015-12-19 02:35:13 +01:00
|> Array.to_list
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Computes the first 4 centered cumulants (zero mean) *)
let centered_cumulants { property ; data } =
let ave =
average { property ; data }
|> Average.to_float
in
let centered_data =
2022-01-11 13:41:13 +01:00
List.rev_map (fun x ->
( (Weight.to_float x.Block.weight),
2015-12-19 02:35:13 +01:00
(Sample.to_float x.Block.value) -. ave )
2020-04-15 14:40:33 +02:00
) data
|> List.rev
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let var =
let (num, denom) =
2019-07-19 11:46:29 +02:00
List.fold_left (fun (a2, ad) (w,x) ->
2015-12-19 02:35:13 +01:00
let x2 = x *. x
in
let var = w *. x2
and den = w
in (a2 +. var, ad +. den)
2019-07-19 11:46:29 +02:00
) (0., 0.) centered_data
2015-12-19 02:35:13 +01:00
in num /. denom
2022-01-11 13:41:13 +01:00
in
2015-12-19 02:35:13 +01:00
let centered_data =
let sigma_inv =
1. /. (sqrt var)
in
2022-01-11 13:41:13 +01:00
List.rev_map (fun x ->
( (Weight.to_float x.Block.weight),
2015-12-19 02:35:13 +01:00
( (Sample.to_float x.Block.value) -. ave ) *. sigma_inv )
2020-04-15 14:40:33 +02:00
) data
|> List.rev
2015-12-19 02:35:13 +01:00
in
2022-01-11 13:41:13 +01:00
let (cum3,cum4) =
2015-12-19 02:35:13 +01:00
let (cum3, cum4, denom) =
2019-07-19 11:46:29 +02:00
List.fold_left (fun (a3, a4, ad) (w,x) ->
2015-12-19 02:35:13 +01:00
let x2 = x *. x
in
let cum3 = w *. x2 *. x
and cum4 = w *. x2 *. x2
and den = w
in (a3 +. cum3, a4 +. cum4, ad +. den)
2019-07-19 11:46:29 +02:00
) (0., 0., 0.) centered_data
2022-01-11 13:41:13 +01:00
in
2015-12-19 02:35:13 +01:00
( cum3 /. denom, cum4 /. denom -. 3. )
2022-01-11 13:41:13 +01:00
in
2015-12-19 02:35:13 +01:00
[| ave ; var ; cum3 ; cum4 |]
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00
(** Computes a histogram *)
let histogram { property ; data } =
2022-01-11 13:41:13 +01:00
let min, max =
2015-12-19 02:35:13 +01:00
(min_block { property ; data }),
(max_block { property ; data })
in
2022-01-11 13:41:13 +01:00
let length =
2015-12-19 02:35:13 +01:00
max -. min
and n =
List.length data
2019-07-14 18:50:44 +02:00
|> float_of_int
2015-12-19 02:35:13 +01:00
|> sqrt
in
2022-01-11 13:41:13 +01:00
let delta_x =
2015-12-19 02:35:13 +01:00
length /. (n-.1.)
and result =
2022-01-11 13:41:13 +01:00
Array.init (int_of_float n + 1) (fun _ -> 0.)
2015-12-19 02:35:13 +01:00
in
2019-07-19 11:46:29 +02:00
List.iter (fun x ->
2015-12-19 02:35:13 +01:00
let w =
(Weight.to_float x.Block.weight)
2022-01-11 13:41:13 +01:00
and x =
2015-12-19 02:35:13 +01:00
(Sample.to_float x.Block.value)
in
let i =
(x -. min) /. delta_x +. 0.5
2019-07-14 18:50:44 +02:00
|> int_of_float
2015-12-19 02:35:13 +01:00
in
result.(i) <- result.(i) +. w
) data
;
2022-01-11 13:41:13 +01:00
let norm =
2015-12-19 02:35:13 +01:00
1. /. ( delta_x *. (
2019-07-19 11:46:29 +02:00
Array.fold_left (fun accu x -> accu +. x) 0. result
2015-12-19 02:35:13 +01:00
) )
in
2019-07-19 11:46:29 +02:00
Array.mapi (fun i x -> (min +. (float_of_int i)*.delta_x, x *. norm) ) result
2015-12-19 02:35:13 +01:00
|> Array.to_list
2016-02-19 11:20:34 +01:00
2015-12-19 02:35:13 +01:00