10
1
mirror of https://gitlab.com/scemama/QCaml.git synced 2024-12-22 12:23:31 +01:00

Improved parallelism in F12

This commit is contained in:
Anthony Scemama 2020-02-05 23:32:55 +01:00
parent 6e6ef8df2d
commit daa408fc9c
5 changed files with 117 additions and 22 deletions

View File

@ -666,7 +666,8 @@ let make ?(n_states=1) ?(algo=`Direct) det_space =
in in
let matrix_prod psi = let matrix_prod psi =
let result = let result =
Matrix.mm ~transa:`T m_H psi Matrix.parallel_mm ~transa:`T psi m_H
|> Matrix.transpose
in in
Parallel.broadcast (lazy result) Parallel.broadcast (lazy result)
in in

View File

@ -25,7 +25,7 @@ let eigensystem t = Lazy.force t.eigensystem
let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci = let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
if Parallel.master then if Parallel.master then
Printf.printf "Building matrix\n%!"; Printf.printf "Building dressing\n%!";
let det_space = let det_space =
ci.CI.det_space ci.CI.det_space
in in
@ -50,11 +50,20 @@ let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
| 3 -> f_3 ki kj | 3 -> f_3 ki kj
| _ -> assert false | _ -> assert false
) det_space ) det_space
in
let m_HF =
Lazy.force m_HF
in in
Matrix.mm (Lazy.force m_HF) (Matrix.dense_of_mat f12_amplitudes) let result =
Matrix.parallel_mm m_HF (Matrix.dense_of_mat f12_amplitudes)
in
if Parallel.master then
Printf.printf "dressing done\n%!";
Parallel.broadcast (lazy result)
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
@ -134,7 +143,8 @@ Format.printf "%a@." Matrix.pp_matrix @@ Matrix.dense_of_mat psi;
let matrix_prod c = let matrix_prod c =
let w = let w =
Matrix.mm ~transa:`T m_H c Matrix.mm ~transa:`T c m_H
|> Matrix.transpose
|> Matrix.to_mat |> Matrix.to_mat
in in
let c = Matrix.to_mat c in let c = Matrix.to_mat c in

View File

@ -15,7 +15,7 @@ type t = {
} }
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l let sum l f = Array.fold_left (fun accu i -> accu +. f i) 0. l
let array_3_init d1 d2 d3 fx = let array_3_init d1 d2 d3 fx =
let f k = let f k =
@ -225,6 +225,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let mos_cabs = let mos_cabs =
Util.list_range (mo_num+1) aux_num Util.list_range (mo_num+1) aux_num
|> Array.of_list
in in
let n_core = let n_core =
@ -235,18 +236,21 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let mos_in = let mos_in =
Util.list_range (n_core+1) mo_num Util.list_range (n_core+1) mo_num
|> Array.of_list
in in
let mos_a k = let mos_a k =
Determinant.alfa k Determinant.alfa k
|> Spindeterminant.to_list |> Spindeterminant.to_list
|> List.filter (fun i -> i > n_core) |> List.filter (fun i -> i > n_core)
|> Array.of_list
in in
let mos_b k = let mos_b k =
Determinant.beta k Determinant.beta k
|> Spindeterminant.to_list |> Spindeterminant.to_list
|> List.filter (fun i -> i > n_core) |> List.filter (fun i -> i > n_core)
|> Array.of_list
in in
let h_one = let h_one =
@ -643,11 +647,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logor i j) Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in in
let beta = let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logor i j) Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in in
match s with match s with
| Spin.Alfa -> alfa, beta | Spin.Alfa -> alfa, beta
@ -659,11 +665,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logand i j) Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in in
let beta = let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logand i j) Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in in
match s with match s with
| Spin.Alfa -> alfa, beta | Spin.Alfa -> alfa, beta
@ -906,11 +914,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logand i j) Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in in
let beta = let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logand i j) Bitstring.to_list (Bitstring.logand i j)
|> Array.of_list
in in
match s with match s with
| Spin.Alfa -> alfa, beta | Spin.Alfa -> alfa, beta
@ -922,11 +932,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
Bitstring.to_list (Bitstring.logor i j) Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in in
let beta = let beta =
let i = Spindeterminant.bitstring @@ Determinant.beta ki in let i = Spindeterminant.bitstring @@ Determinant.beta ki in
let j = Spindeterminant.bitstring @@ Determinant.beta kj in let j = Spindeterminant.bitstring @@ Determinant.beta kj in
Bitstring.to_list (Bitstring.logor i j) Bitstring.to_list (Bitstring.logor i j)
|> Array.of_list
in in
match s with match s with
| Spin.Alfa -> alfa, beta | Spin.Alfa -> alfa, beta
@ -961,6 +973,10 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
in in
let ncabs = Array.length mos_cabs in
let tmp_h = Mat.make0 ncabs 9 in
let tmp_f = Mat.make0 ncabs 9 in
let f_3 ki kj = let f_3 ki kj =
let i, j, m, k, l, n, s1, s2, s3, phase = let i, j, m, k, l, n, s1, s2, s3, phase =
match Excitation.of_det ki kj with match Excitation.of_det ki kj with
@ -977,6 +993,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
match s1, s2, s3 with match s1, s2, s3 with
| Alfa, Alfa, Alfa | Alfa, Alfa, Alfa
| Beta, Beta, Beta -> | Beta, Beta, Beta ->
(*
sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m n l s3 s3) sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m n l s3 s3)
+. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m l k s2 s3) +. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m l k s2 s3)
-. sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m n k s3 s3) -. sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m n k s3 s3)
@ -986,8 +1003,29 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1) +. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
+. sum mos_cabs (fun a -> h_two a k j m s2 s3 *. f_two a i n l s3 s1) +. sum mos_cabs (fun a -> h_two a k j m s2 s3 *. f_two a i n l s3 s1)
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1) -. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j l k s2 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l k s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,9} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a j n k s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a m n k s3 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a i n l s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m n l s3 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else -. h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a k i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else h_two a n i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a k i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else h_two a l i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,9} <- if tmp_f.{idx+1,9} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else h_two a k j m s2 s3) mos_cabs;
dot (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Alfa, Alfa, Beta | Alfa, Alfa, Beta
| Beta, Beta, Alfa -> | Beta, Beta, Alfa ->
(*
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3) sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
-. sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m l n s1 s3) -. sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m l n s1 s3)
+. sum mos_cabs (fun a -> h_two a l m j s3 s2 *. f_two a i n k s3 s1) +. sum mos_cabs (fun a -> h_two a l m j s3 s2 *. f_two a i n k s3 s1)
@ -996,8 +1034,27 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
-. sum mos_cabs (fun a -> h_two a l m i s3 s1 *. f_two a j n k s3 s2) -. sum mos_cabs (fun a -> h_two a l m i s3 s1 *. f_two a j n k s3 s2)
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1) +. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
-. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j l k s2 s2) -. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j l k s2 s2)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i n l s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n k s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a j l k s2 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a k i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a l m j s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else -. h_two a k m j s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Alfa, Beta, Beta | Alfa, Beta, Beta
| Beta, Alfa, Alfa -> | Beta, Alfa, Alfa ->
(*
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3) sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
-. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m k l s1 s2) -. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m k l s1 s2)
+. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j k l s1 s2) +. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j k l s1 s2)
@ -1006,6 +1063,24 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1) -. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
+. sum mos_cabs (fun a -> h_two a k m i s3 s1 *. f_two a j n l s3 s2) +. sum mos_cabs (fun a -> h_two a k m i s3 s1 *. f_two a j n l s3 s2)
-. sum mos_cabs (fun a -> h_two a k j i s2 s1 *. f_two a m n l s3 s2) -. sum mos_cabs (fun a -> h_two a k j i s2 s1 *. f_two a m n l s3 s2)
*)
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a i n k s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i l k s2 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m k l s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a j k l s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a j n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a m n l s3 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a n i j s1 s2) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a n i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a l i m s1 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a k j i s2 s1) mos_cabs;
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
| Beta, Alfa, Beta | Beta, Alfa, Beta
| Alfa, Beta, Alfa -> assert false | Alfa, Beta, Alfa -> assert false
in in

View File

@ -364,7 +364,7 @@ let rec mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
if a <> 0. then if a <> 0. then
begin begin
for i = 1 to m' do for i = 1 to m' do
Bigarray.Array1.unsafe_set v i (f' i (k+1)); v.{i} <- (f' i (k+1));
done; done;
axpy ~alpha:a v accu axpy ~alpha:a v accu
end end
@ -595,31 +595,39 @@ let rec ax_eq_b ?(trans=`N) a b =
(* ------- Parallel routines ---------- *) (* ------- Parallel routines ---------- *)
let parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b = let rec parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
let n = let m =
match transa with match transa with
| `N -> dim2 a | `N -> dim2 a
| `T -> dim1 a | `T -> dim1 a
in in
let n = n / (Parallel.size * 7) in
let b = let b =
match transb with match transb with
| `T -> transpose b | `T -> transpose b
| `N -> b | `N -> b
in in
split_cols n b assert (m = dim1 b);
|> Stream.of_list let n = 1 + (m / (Parallel.size * 7)) in
|> Farm.run ~ordered:true ~f:(fun b -> let chunks =
match a, b with split_cols n b
| Computed _, Computed _ -> in
mm ~transa ~threshold a b
|> sparse_of_computed ~threshold let result =
| _ -> Stream.of_list chunks
mm ~transa ~threshold a b |> Farm.run ~ordered:true ~f:(fun b ->
) match a, b with
|> Util.stream_to_list | Computed _, Computed _ ->
|> join_cols mm ~transa ~threshold a b
|> sparse_of_computed ~threshold
| _ ->
mm ~transa ~threshold a b
)
|> Util.stream_to_list
|> join_cols
in
result
(* ------------ Printers ------------ *) (* ------------ Printers ------------ *)

View File

@ -220,6 +220,7 @@ let list_range first last =
let list_pack n l = let list_pack n l =
assert (n>=0);
let rec aux i accu1 accu2 = function let rec aux i accu1 accu2 = function
| [] -> if accu1 = [] then | [] -> if accu1 = [] then
List.rev accu2 List.rev accu2