Improved parallelism in F12

This commit is contained in:
Anthony Scemama 2020-02-05 23:32:55 +01:00
parent 6e6ef8df2d
commit daa408fc9c
5 changed files with 117 additions and 22 deletions

View File

@ -666,7 +666,8 @@ let make ?(n_states=1) ?(algo=`Direct) det_space =
in
let matrix_prod psi =
let result =
Matrix.mm ~transa:`T m_H psi
Matrix.parallel_mm ~transa:`T psi m_H
|> Matrix.transpose
in
Parallel.broadcast (lazy result)
in

View File

@ -25,7 +25,7 @@ let eigensystem t = Lazy.force t.eigensystem
let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
if Parallel.master then
Printf.printf "Building matrix\n%!";
Printf.printf "Building dressing\n%!";
let det_space =
ci.CI.det_space
in
@ -50,11 +50,20 @@ let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
| 3 -> f_3 ki kj
| _ -> assert false
) det_space
in
let m_HF =
Lazy.force m_HF
in
Matrix.mm (Lazy.force m_HF) (Matrix.dense_of_mat f12_amplitudes)
let result =
Matrix.parallel_mm m_HF (Matrix.dense_of_mat f12_amplitudes)
in
if Parallel.master then
Printf.printf "dressing done\n%!";
Parallel.broadcast (lazy result)
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
@ -134,7 +143,8 @@ Format.printf "%a@." Matrix.pp_matrix @@ Matrix.dense_of_mat psi;
let matrix_prod c =
let w =
Matrix.mm ~transa:`T m_H c
Matrix.mm ~transa:`T c m_H
|> Matrix.transpose
|> Matrix.to_mat
in
let c = Matrix.to_mat c in

View File

@ -15,7 +15,7 @@ type t = {
}
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
let sum l f = Array.fold_left (fun accu i -> accu +. f i) 0. l
let array_3_init d1 d2 d3 fx =
let f k =
@ -225,6 +225,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let mos_cabs =
Util.list_range (mo_num+1) aux_num
|> Array.of_list
in
let n_core =
@ -235,18 +236,21 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let mos_in =
Util.list_range (n_core+1) mo_num
|> Array.of_list
in
let mos_a k =
Determinant.alfa k
|> Spindeterminant.to_list
|> List.filter (fun i -> i > n_core)
|> Array.of_list
in
let mos_b k =
Determinant.beta k
|> Spindeterminant.to_list
|> List.filter (fun i -> i > n_core)
|> Array.of_list
in
let h_one =
@ -643,11 +647,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in
let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in
match s with
| Spin.Alfa -> alfa, beta
@ -659,11 +665,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in
let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in
match s with
| Spin.Alfa -> alfa, beta
@ -906,11 +914,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in
let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in
match s with
| Spin.Alfa -> alfa, beta
@ -922,11 +932,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in
let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in
match s with
| Spin.Alfa -> alfa, beta
@ -961,6 +973,10 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
in
let ncabs = Array.length mos_cabs in
let tmp_h = Mat.make0 ncabs 9 in
let tmp_f = Mat.make0 ncabs 9 in
let f_3 ki kj =
let i, j, m, k, l, n, s1, s2, s3, phase =
match Excitation.of_det ki kj with
@ -977,6 +993,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
match s1, s2, s3 with
| Alfa, Alfa, Alfa
| Beta, Beta, Beta ->
(*
sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m n l s3 s3)
+. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m l k s2 s3)
-. sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m n k s3 s3)
@ -986,8 +1003,29 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
+. sum mos_cabs (fun a -> h_two a k j m s2 s3 *. f_two a i n l s3 s1)
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j l k s2 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l k s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,9} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a j n k s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a m n k s3 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a i n l s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m n l s3 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else -. h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a k i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else h_two a n i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a k i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else h_two a l i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,9} <- if tmp_f.{idx+1,9} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else h_two a k j m s2 s3) mos_cabs;
dot (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Alfa, Alfa, Beta
| Beta, Beta, Alfa ->
(*
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
-. sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m l n s1 s3)
+. sum mos_cabs (fun a -> h_two a l m j s3 s2 *. f_two a i n k s3 s1)
@ -996,8 +1034,27 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
-. sum mos_cabs (fun a -> h_two a l m i s3 s1 *. f_two a j n k s3 s2)
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
-. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j l k s2 s2)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i n l s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n k s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a j l k s2 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a k i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a l m j s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else -. h_two a k m j s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Alfa, Beta, Beta
| Beta, Alfa, Alfa ->
(*
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
-. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m k l s1 s2)
+. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j k l s1 s2)
@ -1006,6 +1063,24 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
+. sum mos_cabs (fun a -> h_two a k m i s3 s1 *. f_two a j n l s3 s2)
-. sum mos_cabs (fun a -> h_two a k j i s2 s1 *. f_two a m n l s3 s2)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m k l s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a j k l s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a m n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a n i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a n i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a l i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a k j i s2 s1) mos_cabs;
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Beta, Alfa, Beta
| Alfa, Beta, Alfa -> assert false
in

View File

@ -364,7 +364,7 @@ let rec mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
if a <> 0. then
begin
for i = 1 to m' do
Bigarray.Array1.unsafe_set v i (f' i (k+1));
v.{i} <- (f' i (k+1));
done;
axpy ~alpha:a v accu
end
@ -595,31 +595,39 @@ let rec ax_eq_b ?(trans=`N) a b =
(* ------- Parallel routines ---------- *)
let parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
let rec parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
let n =
let m =
match transa with
| `N -> dim2 a
| `T -> dim1 a
in
let n = n / (Parallel.size * 7) in
let b =
match transb with
| `T -> transpose b
| `N -> b
in
split_cols n b
|> Stream.of_list
|> Farm.run ~ordered:true ~f:(fun b ->
match a, b with
| Computed _, Computed _ ->
mm ~transa ~threshold a b
|> sparse_of_computed ~threshold
| _ ->
mm ~transa ~threshold a b
)
|> Util.stream_to_list
|> join_cols
assert (m = dim1 b);
let n = 1 + (m / (Parallel.size * 7)) in
let chunks =
split_cols n b
in
let result =
Stream.of_list chunks
|> Farm.run ~ordered:true ~f:(fun b ->
match a, b with
| Computed _, Computed _ ->
mm ~transa ~threshold a b
|> sparse_of_computed ~threshold
| _ ->
mm ~transa ~threshold a b
)
|> Util.stream_to_list
|> join_cols
in
result
(* ------------ Printers ------------ *)

View File

@ -220,6 +220,7 @@ let list_range first last =
let list_pack n l =
assert (n>=0);
let rec aux i accu1 accu2 = function
| [] -> if accu1 = [] then
List.rev accu2