mirror of
https://gitlab.com/scemama/QCaml.git
synced 2024-12-22 20:33:36 +01:00
Improved parallelism in F12
This commit is contained in:
parent
6e6ef8df2d
commit
daa408fc9c
3
CI/CI.ml
3
CI/CI.ml
@ -666,7 +666,8 @@ let make ?(n_states=1) ?(algo=`Direct) det_space =
|
|||||||
in
|
in
|
||||||
let matrix_prod psi =
|
let matrix_prod psi =
|
||||||
let result =
|
let result =
|
||||||
Matrix.mm ~transa:`T m_H psi
|
Matrix.parallel_mm ~transa:`T psi m_H
|
||||||
|
|> Matrix.transpose
|
||||||
in
|
in
|
||||||
Parallel.broadcast (lazy result)
|
Parallel.broadcast (lazy result)
|
||||||
in
|
in
|
||||||
|
18
CI/F12CI.ml
18
CI/F12CI.ml
@ -25,7 +25,7 @@ let eigensystem t = Lazy.force t.eigensystem
|
|||||||
let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
|
let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
|
||||||
|
|
||||||
if Parallel.master then
|
if Parallel.master then
|
||||||
Printf.printf "Building matrix\n%!";
|
Printf.printf "Building dressing\n%!";
|
||||||
let det_space =
|
let det_space =
|
||||||
ci.CI.det_space
|
ci.CI.det_space
|
||||||
in
|
in
|
||||||
@ -50,11 +50,20 @@ let dressing_vector ~frozen_core hf12_integrals f12_amplitudes ci =
|
|||||||
| 3 -> f_3 ki kj
|
| 3 -> f_3 ki kj
|
||||||
| _ -> assert false
|
| _ -> assert false
|
||||||
) det_space
|
) det_space
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
Matrix.mm (Lazy.force m_HF) (Matrix.dense_of_mat f12_amplitudes)
|
let m_HF =
|
||||||
|
Lazy.force m_HF
|
||||||
|
in
|
||||||
|
|
||||||
|
let result =
|
||||||
|
Matrix.parallel_mm m_HF (Matrix.dense_of_mat f12_amplitudes)
|
||||||
|
in
|
||||||
|
|
||||||
|
if Parallel.master then
|
||||||
|
Printf.printf "dressing done\n%!";
|
||||||
|
|
||||||
|
Parallel.broadcast (lazy result)
|
||||||
|
|
||||||
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
|
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
|
||||||
|
|
||||||
@ -134,7 +143,8 @@ Format.printf "%a@." Matrix.pp_matrix @@ Matrix.dense_of_mat psi;
|
|||||||
|
|
||||||
let matrix_prod c =
|
let matrix_prod c =
|
||||||
let w =
|
let w =
|
||||||
Matrix.mm ~transa:`T m_H c
|
Matrix.mm ~transa:`T c m_H
|
||||||
|
|> Matrix.transpose
|
||||||
|> Matrix.to_mat
|
|> Matrix.to_mat
|
||||||
in
|
in
|
||||||
let c = Matrix.to_mat c in
|
let c = Matrix.to_mat c in
|
||||||
|
@ -15,7 +15,7 @@ type t = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let sum l f = List.fold_left (fun accu i -> accu +. f i) 0. l
|
let sum l f = Array.fold_left (fun accu i -> accu +. f i) 0. l
|
||||||
|
|
||||||
let array_3_init d1 d2 d3 fx =
|
let array_3_init d1 d2 d3 fx =
|
||||||
let f k =
|
let f k =
|
||||||
@ -225,6 +225,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
|
|
||||||
let mos_cabs =
|
let mos_cabs =
|
||||||
Util.list_range (mo_num+1) aux_num
|
Util.list_range (mo_num+1) aux_num
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
|
|
||||||
let n_core =
|
let n_core =
|
||||||
@ -235,18 +236,21 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
|
|
||||||
let mos_in =
|
let mos_in =
|
||||||
Util.list_range (n_core+1) mo_num
|
Util.list_range (n_core+1) mo_num
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
|
|
||||||
let mos_a k =
|
let mos_a k =
|
||||||
Determinant.alfa k
|
Determinant.alfa k
|
||||||
|> Spindeterminant.to_list
|
|> Spindeterminant.to_list
|
||||||
|> List.filter (fun i -> i > n_core)
|
|> List.filter (fun i -> i > n_core)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
|
|
||||||
let mos_b k =
|
let mos_b k =
|
||||||
Determinant.beta k
|
Determinant.beta k
|
||||||
|> Spindeterminant.to_list
|
|> Spindeterminant.to_list
|
||||||
|> List.filter (fun i -> i > n_core)
|
|> List.filter (fun i -> i > n_core)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
|
|
||||||
let h_one =
|
let h_one =
|
||||||
@ -643,11 +647,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
||||||
Bitstring.to_list (Bitstring.logor i j)
|
Bitstring.to_list (Bitstring.logor i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
let beta =
|
let beta =
|
||||||
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
||||||
Bitstring.to_list (Bitstring.logor i j)
|
Bitstring.to_list (Bitstring.logor i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
match s with
|
match s with
|
||||||
| Spin.Alfa -> alfa, beta
|
| Spin.Alfa -> alfa, beta
|
||||||
@ -659,11 +665,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
||||||
Bitstring.to_list (Bitstring.logand i j)
|
Bitstring.to_list (Bitstring.logand i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
let beta =
|
let beta =
|
||||||
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
||||||
Bitstring.to_list (Bitstring.logand i j)
|
Bitstring.to_list (Bitstring.logand i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
match s with
|
match s with
|
||||||
| Spin.Alfa -> alfa, beta
|
| Spin.Alfa -> alfa, beta
|
||||||
@ -906,11 +914,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
||||||
Bitstring.to_list (Bitstring.logand i j)
|
Bitstring.to_list (Bitstring.logand i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
let beta =
|
let beta =
|
||||||
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
||||||
Bitstring.to_list (Bitstring.logand i j)
|
Bitstring.to_list (Bitstring.logand i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
match s with
|
match s with
|
||||||
| Spin.Alfa -> alfa, beta
|
| Spin.Alfa -> alfa, beta
|
||||||
@ -922,11 +932,13 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
let i = Spindeterminant.bitstring @@ Determinant.alfa ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
let j = Spindeterminant.bitstring @@ Determinant.alfa kj in
|
||||||
Bitstring.to_list (Bitstring.logor i j)
|
Bitstring.to_list (Bitstring.logor i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
let beta =
|
let beta =
|
||||||
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
let i = Spindeterminant.bitstring @@ Determinant.beta ki in
|
||||||
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
let j = Spindeterminant.bitstring @@ Determinant.beta kj in
|
||||||
Bitstring.to_list (Bitstring.logor i j)
|
Bitstring.to_list (Bitstring.logor i j)
|
||||||
|
|> Array.of_list
|
||||||
in
|
in
|
||||||
match s with
|
match s with
|
||||||
| Spin.Alfa -> alfa, beta
|
| Spin.Alfa -> alfa, beta
|
||||||
@ -961,6 +973,10 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
|
let ncabs = Array.length mos_cabs in
|
||||||
|
let tmp_h = Mat.make0 ncabs 9 in
|
||||||
|
let tmp_f = Mat.make0 ncabs 9 in
|
||||||
|
|
||||||
let f_3 ki kj =
|
let f_3 ki kj =
|
||||||
let i, j, m, k, l, n, s1, s2, s3, phase =
|
let i, j, m, k, l, n, s1, s2, s3, phase =
|
||||||
match Excitation.of_det ki kj with
|
match Excitation.of_det ki kj with
|
||||||
@ -977,6 +993,7 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
match s1, s2, s3 with
|
match s1, s2, s3 with
|
||||||
| Alfa, Alfa, Alfa
|
| Alfa, Alfa, Alfa
|
||||||
| Beta, Beta, Beta ->
|
| Beta, Beta, Beta ->
|
||||||
|
(*
|
||||||
sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m n l s3 s3)
|
sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m n l s3 s3)
|
||||||
+. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m l k s2 s3)
|
+. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m l k s2 s3)
|
||||||
-. sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m n k s3 s3)
|
-. sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m n k s3 s3)
|
||||||
@ -986,8 +1003,29 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
|
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
|
||||||
+. sum mos_cabs (fun a -> h_two a k j m s2 s3 *. f_two a i n l s3 s1)
|
+. sum mos_cabs (fun a -> h_two a k j m s2 s3 *. f_two a i n l s3 s1)
|
||||||
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
|
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
|
||||||
|
*)
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j l k s2 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l k s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,9} <- f_two a i n k s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a j n k s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a m n k s3 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a i n l s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n l s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m n l s3 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else -. h_two a l i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a k i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else h_two a n i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a k i m s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else h_two a l i m s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,9} <- if tmp_f.{idx+1,9} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else h_two a n j m s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else h_two a k j m s2 s3) mos_cabs;
|
||||||
|
dot (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
|
||||||
| Alfa, Alfa, Beta
|
| Alfa, Alfa, Beta
|
||||||
| Beta, Beta, Alfa ->
|
| Beta, Beta, Alfa ->
|
||||||
|
(*
|
||||||
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
|
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
|
||||||
-. sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m l n s1 s3)
|
-. sum mos_cabs (fun a -> h_two a k i j s1 s2 *. f_two a m l n s1 s3)
|
||||||
+. sum mos_cabs (fun a -> h_two a l m j s3 s2 *. f_two a i n k s3 s1)
|
+. sum mos_cabs (fun a -> h_two a l m j s3 s2 *. f_two a i n k s3 s1)
|
||||||
@ -996,8 +1034,27 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
-. sum mos_cabs (fun a -> h_two a l m i s3 s1 *. f_two a j n k s3 s2)
|
-. sum mos_cabs (fun a -> h_two a l m i s3 s1 *. f_two a j n k s3 s2)
|
||||||
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
|
+. sum mos_cabs (fun a -> h_two a n j m s2 s3 *. f_two a i l k s2 s1)
|
||||||
-. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j l k s2 s2)
|
-. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j l k s2 s2)
|
||||||
|
*)
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m l n s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i n l s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j n l s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a i n k s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a j n k s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a i l k s2 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a j l k s2 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a k i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a l m j s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else -. h_two a k m j s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l m i s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a n i m s1 s3) mos_cabs;
|
||||||
|
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
|
||||||
| Alfa, Beta, Beta
|
| Alfa, Beta, Beta
|
||||||
| Beta, Alfa, Alfa ->
|
| Beta, Alfa, Alfa ->
|
||||||
|
(*
|
||||||
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
|
sum mos_cabs (fun a -> h_two a l i j s1 s2 *. f_two a m k n s1 s3)
|
||||||
-. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m k l s1 s2)
|
-. sum mos_cabs (fun a -> h_two a n i j s1 s2 *. f_two a m k l s1 s2)
|
||||||
+. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j k l s1 s2)
|
+. sum mos_cabs (fun a -> h_two a n i m s1 s3 *. f_two a j k l s1 s2)
|
||||||
@ -1006,6 +1063,24 @@ let make ~frozen_core ~simulation ~mo_basis ~aux_basis_filename () =
|
|||||||
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
|
-. sum mos_cabs (fun a -> h_two a l j m s2 s3 *. f_two a i n k s3 s1)
|
||||||
+. sum mos_cabs (fun a -> h_two a k m i s3 s1 *. f_two a j n l s3 s2)
|
+. sum mos_cabs (fun a -> h_two a k m i s3 s1 *. f_two a j n l s3 s2)
|
||||||
-. sum mos_cabs (fun a -> h_two a k j i s2 s1 *. f_two a m n l s3 s2)
|
-. sum mos_cabs (fun a -> h_two a k j i s2 s1 *. f_two a m n l s3 s2)
|
||||||
|
*)
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,6} <- f_two a i n k s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,4} <- f_two a i l k s2 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,5} <- f_two a j k n s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,1} <- f_two a m k n s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,2} <- f_two a m k l s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,3} <- f_two a j k l s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,7} <- f_two a j n l s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_f.{idx+1,8} <- f_two a m n l s3 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,1} <- if tmp_f.{idx+1,1} = 0. then 0. else h_two a l i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,2} <- if tmp_f.{idx+1,2} = 0. then 0. else -. h_two a n i j s1 s2) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,3} <- if tmp_f.{idx+1,3} = 0. then 0. else +. h_two a n i m s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,4} <- if tmp_f.{idx+1,4} = 0. then 0. else +. h_two a n j m s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,5} <- if tmp_f.{idx+1,5} = 0. then 0. else -. h_two a l i m s1 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,6} <- if tmp_f.{idx+1,6} = 0. then 0. else -. h_two a l j m s2 s3) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,7} <- if tmp_f.{idx+1,7} = 0. then 0. else +. h_two a k m i s3 s1) mos_cabs;
|
||||||
|
Array.iteri (fun idx a -> tmp_h.{idx+1,8} <- if tmp_f.{idx+1,8} = 0. then 0. else -. h_two a k j i s2 s1) mos_cabs;
|
||||||
|
dot ~n:(8*ncabs) (Mat.as_vec tmp_h) (Mat.as_vec tmp_f)
|
||||||
| Beta, Alfa, Beta
|
| Beta, Alfa, Beta
|
||||||
| Alfa, Beta, Alfa -> assert false
|
| Alfa, Beta, Alfa -> assert false
|
||||||
in
|
in
|
||||||
|
@ -364,7 +364,7 @@ let rec mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
|
|||||||
if a <> 0. then
|
if a <> 0. then
|
||||||
begin
|
begin
|
||||||
for i = 1 to m' do
|
for i = 1 to m' do
|
||||||
Bigarray.Array1.unsafe_set v i (f' i (k+1));
|
v.{i} <- (f' i (k+1));
|
||||||
done;
|
done;
|
||||||
axpy ~alpha:a v accu
|
axpy ~alpha:a v accu
|
||||||
end
|
end
|
||||||
@ -595,31 +595,39 @@ let rec ax_eq_b ?(trans=`N) a b =
|
|||||||
|
|
||||||
(* ------- Parallel routines ---------- *)
|
(* ------- Parallel routines ---------- *)
|
||||||
|
|
||||||
let parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
|
let rec parallel_mm ?(transa=`N) ?(transb=`N) ?(threshold=epsilon) a b =
|
||||||
|
|
||||||
let n =
|
let m =
|
||||||
match transa with
|
match transa with
|
||||||
| `N -> dim2 a
|
| `N -> dim2 a
|
||||||
| `T -> dim1 a
|
| `T -> dim1 a
|
||||||
in
|
in
|
||||||
let n = n / (Parallel.size * 7) in
|
|
||||||
let b =
|
let b =
|
||||||
match transb with
|
match transb with
|
||||||
| `T -> transpose b
|
| `T -> transpose b
|
||||||
| `N -> b
|
| `N -> b
|
||||||
in
|
in
|
||||||
split_cols n b
|
assert (m = dim1 b);
|
||||||
|> Stream.of_list
|
let n = 1 + (m / (Parallel.size * 7)) in
|
||||||
|> Farm.run ~ordered:true ~f:(fun b ->
|
let chunks =
|
||||||
match a, b with
|
split_cols n b
|
||||||
| Computed _, Computed _ ->
|
in
|
||||||
mm ~transa ~threshold a b
|
|
||||||
|> sparse_of_computed ~threshold
|
let result =
|
||||||
| _ ->
|
Stream.of_list chunks
|
||||||
mm ~transa ~threshold a b
|
|> Farm.run ~ordered:true ~f:(fun b ->
|
||||||
)
|
match a, b with
|
||||||
|> Util.stream_to_list
|
| Computed _, Computed _ ->
|
||||||
|> join_cols
|
mm ~transa ~threshold a b
|
||||||
|
|> sparse_of_computed ~threshold
|
||||||
|
| _ ->
|
||||||
|
mm ~transa ~threshold a b
|
||||||
|
)
|
||||||
|
|> Util.stream_to_list
|
||||||
|
|> join_cols
|
||||||
|
in
|
||||||
|
|
||||||
|
result
|
||||||
|
|
||||||
|
|
||||||
(* ------------ Printers ------------ *)
|
(* ------------ Printers ------------ *)
|
||||||
|
@ -220,6 +220,7 @@ let list_range first last =
|
|||||||
|
|
||||||
|
|
||||||
let list_pack n l =
|
let list_pack n l =
|
||||||
|
assert (n>=0);
|
||||||
let rec aux i accu1 accu2 = function
|
let rec aux i accu1 accu2 = function
|
||||||
| [] -> if accu1 = [] then
|
| [] -> if accu1 = [] then
|
||||||
List.rev accu2
|
List.rev accu2
|
||||||
|
Loading…
Reference in New Issue
Block a user