mirror of
https://gitlab.com/scemama/qmcchem.git
synced 2024-12-22 12:23:30 +01:00
751 lines
18 KiB
OCaml
751 lines
18 KiB
OCaml
open Core.Std
|
|
open Qptypes
|
|
|
|
type t =
|
|
{ property : Property.t ;
|
|
data : Block.t list;
|
|
}
|
|
|
|
|
|
module Average = struct
|
|
include Sample
|
|
end
|
|
|
|
module Error = struct
|
|
include Sample
|
|
end
|
|
|
|
module Variance = struct
|
|
include Sample
|
|
end
|
|
|
|
module Skewness: sig
|
|
type t
|
|
val to_float : t -> float
|
|
val of_float : float -> t
|
|
val to_string : t -> string
|
|
end = struct
|
|
type t = float
|
|
let to_string = Float.to_string
|
|
let to_float x = x
|
|
let of_float x = x
|
|
end
|
|
|
|
module Kurtosis: sig
|
|
type t
|
|
val to_float : t -> float
|
|
val of_float : float -> t
|
|
val to_string : t -> string
|
|
end = struct
|
|
type t = float
|
|
let to_string = Float.to_string
|
|
let to_float x = x
|
|
let of_float x = x
|
|
end
|
|
|
|
module GaussianDist: sig
|
|
type t
|
|
val create : mu:Average.t -> sigma2:Variance.t -> t
|
|
val eval : g:t -> x:float -> float
|
|
end = struct
|
|
type t = { mu: Average.t ; sigma2: Variance.t }
|
|
let create ~mu ~sigma2 =
|
|
{ mu ; sigma2 }
|
|
let eval ~g ~x =
|
|
let { mu ; sigma2 } =
|
|
g
|
|
in
|
|
let mu =
|
|
Average.to_float mu
|
|
and sigma2 =
|
|
Variance.to_float sigma2
|
|
in
|
|
let x2 =
|
|
(x -. mu) *. ( x -. mu) /. sigma2
|
|
in
|
|
let pi =
|
|
acos (-1.)
|
|
in
|
|
let c =
|
|
1. /. (sqrt (sigma2 *. (pi +. pi)))
|
|
in
|
|
c *. exp ( -0.5 *. x2)
|
|
|
|
end
|
|
|
|
|
|
|
|
(** Build from raw data. Range values are given in percent. *)
|
|
let of_raw_data ?(locked=true) ~range property =
|
|
let data =
|
|
Block.raw_data ~locked ()
|
|
|> List.filter ~f:(fun x -> x.Block.property = property)
|
|
in
|
|
|
|
let data_in_range rmin rmax =
|
|
|
|
let total_weight =
|
|
List.fold_left data ~init:0. ~f:(fun accu x ->
|
|
(Weight.to_float x.Block.weight) +. accu
|
|
)
|
|
in
|
|
|
|
let wmin, wmax =
|
|
rmin *. total_weight *. 0.01,
|
|
rmax *. total_weight *. 0.01
|
|
in
|
|
|
|
let (_, new_data) =
|
|
List.fold_left data ~init:(0.,[]) ~f:(fun (wsum, l) x ->
|
|
if (wsum > wmax) then
|
|
(wsum,l)
|
|
else
|
|
begin
|
|
let wsum_new =
|
|
wsum +. (Weight.to_float x.Block.weight)
|
|
in
|
|
if (wsum_new > wmin) then
|
|
(wsum_new, x::l)
|
|
else
|
|
(wsum_new, l)
|
|
end
|
|
)
|
|
in
|
|
List.rev new_data
|
|
in
|
|
|
|
let result =
|
|
match range with
|
|
| (0.,100.) -> { property ; data }
|
|
| (rmin,rmax) -> { property ; data=data_in_range rmin rmax }
|
|
in
|
|
result
|
|
|
|
|
|
|
|
(** Compute average *)
|
|
let average { property ; data } =
|
|
if Property.is_scalar property then
|
|
let (num,denom) =
|
|
List.fold ~init:(0., 0.) ~f:(fun (an, ad) x ->
|
|
let num =
|
|
(Weight.to_float x.Block.weight) *. (Sample.to_float x.Block.value)
|
|
and den =
|
|
(Weight.to_float x.Block.weight)
|
|
in (an +. num, ad +. den)
|
|
) data
|
|
in
|
|
num /. denom
|
|
|> Average.of_float
|
|
else
|
|
let dim =
|
|
match data with
|
|
| [] -> 1
|
|
| x :: tl -> Sample.dimension x.Block.value
|
|
in
|
|
let (num,denom) =
|
|
List.fold ~init:(Array.create ~len:dim 0. , 0.) ~f:(fun (an, ad) x ->
|
|
let num =
|
|
Array.map (Sample.to_float_array x.Block.value) ~f:(fun y ->
|
|
(Weight.to_float x.Block.weight) *. y)
|
|
and den = (Weight.to_float x.Block.weight)
|
|
in (
|
|
Array.mapi an ~f:(fun i y -> y +. num.(i)) ,
|
|
ad +. den)
|
|
) data
|
|
in
|
|
let denom_inv =
|
|
1. /. denom
|
|
in
|
|
Array.map num ~f:(fun x -> x *. denom_inv)
|
|
|> Average.of_float_array ~dim
|
|
|
|
|
|
|
|
(** Compute sum (for CPU/Wall time) *)
|
|
let sum { property ; data } =
|
|
List.fold data ~init:0. ~f:(fun accu x ->
|
|
let num = (Weight.to_float x.Block.weight) *. (Sample.to_float x.Block.value)
|
|
in accu +. num
|
|
)
|
|
|
|
|
|
|
|
(** Calculation of the average and error bar *)
|
|
let ave_error { property ; data } =
|
|
|
|
let rec loop ~sum ~avsq ~ansum ~avsum ~n ?idx = function
|
|
| [] ->
|
|
begin
|
|
if (n > 0.) then
|
|
( Average.of_float (sum /. ansum),
|
|
Some (Error.of_float (sqrt ( Float.abs ( avsq /.( ansum *. n)))) ))
|
|
else
|
|
( Average.of_float (sum /. ansum), None)
|
|
end
|
|
| (x,w) :: tail ->
|
|
begin
|
|
let avcu0 =
|
|
avsum /. ansum
|
|
in
|
|
let xw =
|
|
x *. w
|
|
in
|
|
let ansum, avsum, sum =
|
|
ansum +. w ,
|
|
avsum +. xw ,
|
|
sum +. xw
|
|
in
|
|
loop tail
|
|
~sum:sum
|
|
~avsq:(avsq +. (1. -. (w /. ansum)) *. (x -. avcu0)
|
|
*. (x -. avcu0) *. w)
|
|
~avsum:avsum
|
|
~ansum:ansum
|
|
~n:(n +. 1.)
|
|
end
|
|
in
|
|
|
|
let ave_error_scalar = function
|
|
| [] -> (Average.of_float 0., None)
|
|
| (x,w) :: tail ->
|
|
loop tail
|
|
~sum:(x *. w)
|
|
~avsq:0.
|
|
~ansum:w
|
|
~avsum:(x *. w)
|
|
~n:0.
|
|
in
|
|
|
|
if (Property.is_scalar property) then
|
|
List.map data ~f:(fun x ->
|
|
(Sample.to_float x.Block.value,
|
|
Weight.to_float x.Block.weight)
|
|
)
|
|
|> ave_error_scalar
|
|
else
|
|
match data with
|
|
| [] -> (Average.of_float 0., None)
|
|
| head::tail as list_of_samples ->
|
|
let dim =
|
|
head.Block.value
|
|
|> Sample.dimension
|
|
in
|
|
let result =
|
|
Array.init dim ~f:(fun idx ->
|
|
List.map list_of_samples ~f:(fun x ->
|
|
(Sample.to_float ~idx x.Block.value,
|
|
Weight.to_float x.Block.weight)
|
|
)
|
|
|> ave_error_scalar
|
|
)
|
|
in
|
|
( Array.map result ~f:(fun (x,_) -> Average.to_float x)
|
|
|> Average.of_float_array ~dim ,
|
|
if (Array.length result < 2) then
|
|
None
|
|
else
|
|
Some (Array.map result ~f:(function
|
|
| (_,Some y) -> Error.to_float y
|
|
| (_,None) -> 0.)
|
|
|> Average.of_float_array ~dim)
|
|
)
|
|
|
|
|
|
|
|
|
|
(** Fold function for block values *)
|
|
let fold_blocks ~f { property ; data } =
|
|
let init = match List.hd data with
|
|
| None -> 0.
|
|
| Some block -> Sample.to_float block.Block.value
|
|
in
|
|
List.fold_left data ~init:init ~f:(fun accu block ->
|
|
let x = Sample.to_float block.Block.value
|
|
in f accu x
|
|
)
|
|
|
|
|
|
|
|
(** Convergence plot *)
|
|
let convergence { property ; data } =
|
|
|
|
let rec loop ~sum ~avsq ~ansum ~avsum ~n ~accu = function
|
|
| [] -> List.rev accu
|
|
| head :: tail ->
|
|
begin
|
|
let x = Sample.to_float head.Block.value
|
|
and w = Weight.to_float head.Block.weight
|
|
and avcu0 = avsum /. ansum
|
|
in
|
|
let xw = x *. w
|
|
in
|
|
let ansum = ansum +. w
|
|
and avsum = avsum +. xw
|
|
and sum = sum +. xw
|
|
in
|
|
let accu =
|
|
if (n > 0.) then
|
|
(sum /. ansum, sqrt ( Float.abs ( avsq /.( ansum *. n))))::accu
|
|
else
|
|
(sum /. ansum, 0.)::accu
|
|
in
|
|
loop tail
|
|
~sum:sum
|
|
~avsq:(avsq +. (1. -. (w /. ansum)) *. (x -. avcu0)
|
|
*. (x -. avcu0) *. w)
|
|
~avsum:avsum
|
|
~ansum:ansum
|
|
~n:(n +. 1.)
|
|
~accu:accu
|
|
end
|
|
in
|
|
match data with
|
|
| [] -> []
|
|
| head :: tail ->
|
|
begin
|
|
let x = Sample.to_float head.Block.value
|
|
and w = Weight.to_float head.Block.weight
|
|
in
|
|
let s = x *. w in
|
|
loop tail
|
|
~sum:s
|
|
~avsq:0.
|
|
~ansum:w
|
|
~avsum:s
|
|
~n:0.
|
|
~accu:[ (s /. w, 0.) ]
|
|
end
|
|
|
|
|
|
let rev_convergence { property ; data } =
|
|
let p = { property=property ; data = List.rev data } in
|
|
convergence p
|
|
|> List.rev
|
|
|
|
|
|
|
|
(** Min and max of block *)
|
|
let min_block =
|
|
fold_blocks ~f:(fun accu x ->
|
|
if (x < accu) then x
|
|
else accu
|
|
)
|
|
|
|
|
|
let max_block =
|
|
fold_blocks ~f:(fun accu x ->
|
|
if (x > accu) then x
|
|
else accu
|
|
)
|
|
|
|
|
|
|
|
(** Create a hash table for merging *)
|
|
let create_hash ~hashable ~create_key ?(update_block_id=(fun x->x)) t =
|
|
let table = Hashtbl.create ~hashable:hashable ()
|
|
in
|
|
List.iter t.data ~f:(fun block ->
|
|
let key = create_key block
|
|
in
|
|
let open Block in
|
|
Hashtbl.change table key (function
|
|
| Some current ->
|
|
let wc, wb =
|
|
Weight.to_float current.weight,
|
|
Weight.to_float block.weight
|
|
in
|
|
let sw =
|
|
wc +. wb
|
|
in
|
|
if (Property.is_scalar current.property) then
|
|
let vc, vb =
|
|
Sample.to_float current.value,
|
|
Sample.to_float block.value
|
|
in Some
|
|
{ property = current.property ;
|
|
weight = Weight.of_float sw ;
|
|
value = Sample.of_float ((wc *. vc +. wb *. vb) /. sw);
|
|
block_id = update_block_id block.block_id;
|
|
pid = block.pid ;
|
|
compute_node = block.compute_node;
|
|
}
|
|
else
|
|
let vc, vb =
|
|
Sample.to_float_array current.value,
|
|
Sample.to_float_array block.value
|
|
and dim =
|
|
Sample.dimension current.value
|
|
in Some
|
|
{ property = current.property ;
|
|
weight = Weight.of_float sw ;
|
|
value =
|
|
Array.init dim ~f:(fun i -> ((wc *. vc.(i) +. wb *. vb.(i)) /. sw))
|
|
|> Sample.of_float_array ~dim ;
|
|
block_id = update_block_id block.block_id;
|
|
pid = block.pid ;
|
|
compute_node = block.compute_node;
|
|
}
|
|
| None -> Some
|
|
{ property = block.property ;
|
|
weight = block.weight;
|
|
value = block.value ;
|
|
block_id = update_block_id block.block_id;
|
|
pid = block.pid ;
|
|
compute_node = block.compute_node;
|
|
}
|
|
)
|
|
);
|
|
table
|
|
|
|
|
|
|
|
(** Genergic merge function *)
|
|
let merge ~hashable ~create_key ?update_block_id t =
|
|
let table = create_hash ~hashable:hashable ~create_key:create_key
|
|
?update_block_id:update_block_id t
|
|
in
|
|
{ property = t.property ;
|
|
data = Hashtbl.to_alist table
|
|
|> List.sort ~cmp:(fun x y ->
|
|
if (x>y) then 1
|
|
else if (x<y) then -1
|
|
else 0)
|
|
|> List.map ~f:(fun (x,y) -> y)
|
|
}
|
|
|
|
|
|
|
|
(** Merge per block id *)
|
|
let merge_per_block_id =
|
|
merge
|
|
~hashable:Int.hashable
|
|
~create_key:(fun block -> Block_id.to_int block.Block.block_id)
|
|
|
|
|
|
(** Merge per compute_node *)
|
|
let merge_per_compute_node =
|
|
merge
|
|
~hashable:String.hashable
|
|
~create_key:(fun block ->
|
|
Printf.sprintf "%s"
|
|
(Compute_node.to_string block.Block.compute_node) )
|
|
|
|
|
|
|
|
(** Merge per Compute_node and PID *)
|
|
let merge_per_compute_node_and_pid =
|
|
merge
|
|
~hashable:String.hashable
|
|
~create_key:(fun block ->
|
|
Printf.sprintf "%s %10.10d"
|
|
(Compute_node.to_string block.Block.compute_node)
|
|
(Pid.to_int block.Block.pid) )
|
|
|
|
|
|
|
|
(** Merge per Compute_node and BlockId *)
|
|
let merge_per_compute_node_and_block_id =
|
|
merge
|
|
~hashable:String.hashable
|
|
~create_key:(fun block ->
|
|
Printf.sprintf "%s %10.10d"
|
|
(Compute_node.to_string block.Block.compute_node)
|
|
(Block_id.to_int block.Block.block_id) )
|
|
|
|
|
|
|
|
|
|
(** Merge two consecutive blocks *)
|
|
let compress =
|
|
merge
|
|
~hashable:String.hashable
|
|
~create_key:(fun block ->
|
|
Printf.sprintf "%s %10.10d" (Compute_node.to_string block.Block.compute_node)
|
|
(((Block_id.to_int block.Block.block_id)+1)/2))
|
|
~update_block_id:(fun block_id ->
|
|
((Block_id.to_int block_id)+1)/2
|
|
|> Block_id.of_int )
|
|
|
|
|
|
|
|
|
|
(** Last value on each compute node (for wall_time) *)
|
|
let max_value_per_compute_node t =
|
|
let table = Hashtbl.create ~hashable:String.hashable ()
|
|
in
|
|
let create_key block =
|
|
Printf.sprintf "%s %10.10d"
|
|
(Compute_node.to_string block.Block.compute_node)
|
|
(Pid.to_int block.Block.pid)
|
|
in
|
|
List.iter t.data ~f:(fun block ->
|
|
let key = create_key block
|
|
in
|
|
let open Block in
|
|
Hashtbl.change table key (function
|
|
| Some current ->
|
|
let vc = Sample.to_float current.value
|
|
and vb = Sample.to_float block.value
|
|
in
|
|
if (vc > vb) then
|
|
Some current
|
|
else
|
|
Some block
|
|
| None -> Some block
|
|
)
|
|
);
|
|
{ property = t.property ;
|
|
data = Hashtbl.to_alist table
|
|
|> List.sort ~cmp:(fun x y ->
|
|
if (x>y) then 1
|
|
else if (x<y) then -1
|
|
else 0)
|
|
|> List.map ~f:(fun (x,y) -> y)
|
|
}
|
|
|
|
|
|
|
|
|
|
(** String representation *)
|
|
let to_string p =
|
|
match p.property with
|
|
| Property.Cpu -> Printf.sprintf "%s" (Time.Span.to_string (Time.Span.of_sec (sum p)))
|
|
| Property.Wall -> Printf.sprintf "%s" (Time.Span.to_string (Time.Span.of_sec (sum (max_value_per_compute_node p))))
|
|
| Property.Accep -> Printf.sprintf "%16.10f" (average p |> Average.to_float)
|
|
| _ ->
|
|
begin
|
|
if Property.is_scalar p.property then
|
|
match ave_error p with
|
|
| (ave, Some error) ->
|
|
let (ave, error) =
|
|
Average.to_float ave,
|
|
Error.to_float error
|
|
in
|
|
Printf.sprintf "%16.10f +/- %16.10f" ave error
|
|
| (ave, None) ->
|
|
let ave =
|
|
Average.to_float ave
|
|
in
|
|
Printf.sprintf "%16.10f" ave
|
|
else
|
|
match ave_error p with
|
|
| (ave, Some error) ->
|
|
let idxmax =
|
|
Average.dimension ave
|
|
in
|
|
let rec f accu idx =
|
|
if (idx < idxmax) then
|
|
let (ave, error) =
|
|
Average.to_float ~idx ave,
|
|
Error.to_float ~idx error
|
|
in
|
|
let s =
|
|
Printf.sprintf "%8d : %16.10f +/- %16.10f ;\n" (idx+1) ave error
|
|
in
|
|
f (accu ^ s) (idx+1)
|
|
else
|
|
accu
|
|
in
|
|
(f "[ \n" 0) ^ " ]"
|
|
| (ave, None) ->
|
|
Average.to_float ave
|
|
|> Printf.sprintf "%16.10f"
|
|
end
|
|
|
|
|
|
|
|
|
|
(** Compress block files : Merge all the blocks computed on the same host *)
|
|
let compress_files () =
|
|
|
|
Block._raw_data := None;
|
|
|
|
let properties =
|
|
Lazy.force Block.properties
|
|
in
|
|
|
|
(* Create temporary file *)
|
|
let dir_name =
|
|
Block.dir_name
|
|
in
|
|
|
|
let dir_name =
|
|
Lazy.force dir_name
|
|
in
|
|
let files =
|
|
Sys.ls_dir dir_name
|
|
|> List.filter ~f:(fun x ->
|
|
match String.substr_index ~pattern:"locked" x with
|
|
| Some x -> false
|
|
| None -> true
|
|
)
|
|
|> List.map ~f:(fun x -> dir_name^x)
|
|
in
|
|
|
|
let out_channel_dir =
|
|
Filename.temp_dir ~in_dir:(!Ezfio.ezfio_filename ^ "/blocks/") "qmc" ""
|
|
in
|
|
|
|
let out_channel_name =
|
|
let hostname =
|
|
Lazy.force Qmcchem_config.hostname
|
|
and suffix =
|
|
Unix.getpid ()
|
|
|> Pid.to_string
|
|
in
|
|
String.concat [ hostname ; "." ; suffix ]
|
|
in
|
|
|
|
let block_channel =
|
|
Out_channel.create (out_channel_dir ^ out_channel_name)
|
|
in
|
|
|
|
List.iter properties ~f:(fun p ->
|
|
let l =
|
|
match p with
|
|
| Property.Cpu
|
|
| Property.Accep ->
|
|
of_raw_data ~locked:false ~range:(0.,100.) p
|
|
|> merge_per_compute_node
|
|
| Property.Wall ->
|
|
of_raw_data ~locked:false ~range:(0.,100.) p
|
|
|> max_value_per_compute_node
|
|
| _ ->
|
|
of_raw_data ~locked:false ~range:(0.,100.) p
|
|
|> merge_per_compute_node_and_block_id
|
|
in
|
|
List.iter l.data ~f:(fun x ->
|
|
Out_channel.output_string block_channel (Block.to_string x);
|
|
Out_channel.output_char block_channel '\n';
|
|
);
|
|
);
|
|
Out_channel.close block_channel;
|
|
|
|
List.iter files ~f:Unix.remove ;
|
|
Unix.rename ~src:(out_channel_dir^out_channel_name) ~dst:(dir_name^out_channel_name);
|
|
Unix.rmdir out_channel_dir
|
|
|
|
|
|
|
|
(** Autocovariance function (not weighted) *)
|
|
let autocovariance { property ; data } =
|
|
let ave =
|
|
average { property ; data }
|
|
|> Average.to_float
|
|
and data =
|
|
match (merge_per_block_id { property ; data })
|
|
with { property ; data } -> Array.of_list data
|
|
in
|
|
let x_t =
|
|
Array.map ~f:(fun x -> (Sample.to_float x.Block.value) -. ave) data
|
|
in
|
|
let f i =
|
|
let denom =
|
|
if (i > 1) then (Float.of_int i) else 1.
|
|
in
|
|
let r =
|
|
Array.sub ~pos:0 ~len:i x_t
|
|
|> Array.fold ~init:0. ~f:(fun accu x ->
|
|
accu +. x *. x_t.(i))
|
|
in
|
|
r /. denom
|
|
in
|
|
Array.init ~f (Array.length data)
|
|
|> Array.to_list
|
|
|
|
|
|
|
|
(** Computes the first 4 centered cumulants (zero mean) *)
|
|
let centered_cumulants { property ; data } =
|
|
let ave =
|
|
average { property ; data }
|
|
|> Average.to_float
|
|
in
|
|
let centered_data =
|
|
List.map ~f:(fun x ->
|
|
( (Weight.to_float x.Block.weight),
|
|
(Sample.to_float x.Block.value) -. ave )
|
|
)
|
|
data
|
|
in
|
|
let var =
|
|
let (num, denom) =
|
|
List.fold ~init:(0., 0.) ~f:(fun (a2, ad) (w,x) ->
|
|
let x2 = x *. x
|
|
in
|
|
let var = w *. x2
|
|
and den = w
|
|
in (a2 +. var, ad +. den)
|
|
) centered_data
|
|
in num /. denom
|
|
in
|
|
let centered_data =
|
|
let sigma_inv =
|
|
1. /. (sqrt var)
|
|
in
|
|
List.map ~f:(fun x ->
|
|
( (Weight.to_float x.Block.weight),
|
|
( (Sample.to_float x.Block.value) -. ave ) *. sigma_inv )
|
|
)
|
|
data
|
|
in
|
|
let (cum3,cum4) =
|
|
let (cum3, cum4, denom) =
|
|
List.fold ~init:(0., 0., 0.) ~f:(fun (a3, a4, ad) (w,x) ->
|
|
let x2 = x *. x
|
|
in
|
|
let cum3 = w *. x2 *. x
|
|
and cum4 = w *. x2 *. x2
|
|
and den = w
|
|
in (a3 +. cum3, a4 +. cum4, ad +. den)
|
|
) centered_data
|
|
in
|
|
( cum3 /. denom, cum4 /. denom -. 3. )
|
|
in
|
|
[| ave ; var ; cum3 ; cum4 |]
|
|
|
|
|
|
|
|
|
|
(** Computes a histogram *)
|
|
let histogram { property ; data } =
|
|
let min, max =
|
|
(min_block { property ; data }),
|
|
(max_block { property ; data })
|
|
in
|
|
let length =
|
|
max -. min
|
|
and n =
|
|
List.length data
|
|
|> Float.of_int
|
|
|> sqrt
|
|
in
|
|
let delta_x =
|
|
length /. (n-.1.)
|
|
and result =
|
|
Array.init ~f:(fun _ -> 0.) (Int.of_float (n +. 1.))
|
|
in
|
|
List.iter ~f:(fun x ->
|
|
let w =
|
|
(Weight.to_float x.Block.weight)
|
|
and x =
|
|
(Sample.to_float x.Block.value)
|
|
in
|
|
let i =
|
|
(x -. min) /. delta_x +. 0.5
|
|
|> Float.to_int
|
|
in
|
|
result.(i) <- result.(i) +. w
|
|
) data
|
|
;
|
|
let norm =
|
|
1. /. ( delta_x *. (
|
|
Array.fold ~init:0. ~f:(fun accu x -> accu +. x) result
|
|
) )
|
|
in
|
|
Array.mapi ~f:(fun i x -> (min +. (Float.of_int i)*.delta_x, x *. norm) ) result
|
|
|> Array.to_list
|
|
|
|
|