10
1
mirror of https://gitlab.com/scemama/QCaml.git synced 2024-06-14 09:15:19 +02:00
QCaml/Notebooks/SpVec.ipynb

32 KiB

None <html> <head> </head>
In [1]:
#use "topfind";;
#require "jupyter.notebook";;
#require "lacaml.top" ;;
- : unit = ()
Findlib has been successfully loaded. Additional directives:
  #require "package";;      to load a package
  #list;;                   to list the available packages
  #camlp4o;;                to load camlp4 (standard syntax)
  #camlp4r;;                to load camlp4 (revised syntax)
  #predicates "p,q,...";;   to set these predicates
  Topfind.reset();;         to force that packages will be reloaded
  #thread;;                 to enable threads

- : unit = ()
/home/scemama/qp2/external/opam/default/lib/bytes: added to search path
/home/scemama/qp2/external/opam/default/lib/base64: added to search path
/home/scemama/qp2/external/opam/default/lib/base64/base64.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ocaml/compiler-libs: added to search path
/home/scemama/qp2/external/opam/default/lib/ocaml/compiler-libs/ocamlcommon.cma: loaded
/home/scemama/qp2/external/opam/default/lib/result: added to search path
/home/scemama/qp2/external/opam/default/lib/result/result.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ppx_deriving/runtime: added to search path
/home/scemama/qp2/external/opam/default/lib/ppx_deriving/runtime/ppx_deriving_runtime.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ppx_deriving_yojson/runtime: added to search path
/home/scemama/qp2/external/opam/default/lib/ppx_deriving_yojson/runtime/ppx_deriving_yojson_runtime.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ocaml/unix.cma: loaded
/home/scemama/qp2/external/opam/default/lib/uuidm: added to search path
/home/scemama/qp2/external/opam/default/lib/uuidm/uuidm.cma: loaded
/home/scemama/qp2/external/opam/default/lib/easy-format: added to search path
/home/scemama/qp2/external/opam/default/lib/easy-format/easy_format.cma: loaded
/home/scemama/qp2/external/opam/default/lib/biniou: added to search path
/home/scemama/qp2/external/opam/default/lib/biniou/biniou.cma: loaded
/home/scemama/qp2/external/opam/default/lib/yojson: added to search path
/home/scemama/qp2/external/opam/default/lib/yojson/yojson.cma: loaded
/home/scemama/qp2/external/opam/default/lib/jupyter: added to search path
/home/scemama/qp2/external/opam/default/lib/jupyter/jupyter.cma: loaded
/home/scemama/qp2/external/opam/default/lib/jupyter/notebook: added to search path
/home/scemama/qp2/external/opam/default/lib/jupyter/notebook/jupyter_notebook.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ocaml/compiler-libs/ocamlbytecomp.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ocaml/compiler-libs/ocamltoplevel.cma: loaded
/home/scemama/qp2/external/opam/default/lib/ocaml/bigarray.cma: loaded
/home/scemama/qp2/external/opam/default/lib/lacaml: added to search path
/home/scemama/qp2/external/opam/default/lib/lacaml/lacaml.cma: loaded
/home/scemama/qp2/external/opam/default/lib/lacaml/top: added to search path
/home/scemama/qp2/external/opam/default/lib/lacaml/top/lacaml_top.cma: loaded

Sparse Vector module


A sparse vector is a structure made of:

  • The dimension of the vector space
  • The number of non-zeros
  • An array of indices
  • An array of values

The indices are stored in an int Bigarray and the values are stored in a Lacaml.Vec.t

Types

In [2]:
module L = Lacaml.D

type t =
  {
    dim: int ;
    nnz: int ;
    indices: (int, Bigarray.int_elt, Bigarray.fortran_layout) Bigarray.Array1.t ; (* Indices *)
    values: L.Vec.t
  }
Out[2]:
module L = Lacaml.D
Out[2]:
type t = {
  dim : int;
  nnz : int;
  indices :
    (int, Bigarray.int_elt, Bigarray.fortran_layout) Bigarray.Array1.t;
  values : L.Vec.t;
}

Printers

In [3]:
let pp ppf t =
  let pp_data ppf t =
    for i=1 to t.nnz do
      Format.fprintf ppf "@[(%d,@ %f)@]@;" t.indices.{i} t.values.{i}
    done
  in
  Format.fprintf ppf "@[{@[dim:@ %d@]@;@[%a@]}@]" t.dim pp_data t
Out[3]:
val pp : Format.formatter -> t -> unit = <fun>

Creators

In [4]:
let of_vec ?(threshold=0.) v =
  let dim = L.Vec.dim v in
  let buffer_idx = Bigarray.(Array1.create int     fortran_layout) dim in
  let buffer_val = Bigarray.(Array1.create float64 fortran_layout) dim in
  let check = 
    if threshold = 0. then
      fun x -> x <> 0.
    else
      fun x -> (abs_float x) > 0.
  in
  let rec aux k i =
    if i > dim then
      k-1
    else if check v.{i} then
      ( buffer_idx.{k} <- i ;
        buffer_val.{k} <- v.{i} ;
        aux (k+1) (i+1)
      )
    else
      aux k (i+1)
  in
  let nnz = aux 1 1 in
  let indices = Bigarray.(Array1.create int fortran_layout) nnz in
  let values  = L.Vec.create nnz in
  for i=1 to nnz do
    indices.{i} <- buffer_idx.{i};
    values.{i} <- buffer_val.{i};
  done ;
  { dim ; nnz ; indices ; values }
Out[4]:
val of_vec : ?threshold:float -> Lacaml.D.vec -> t = <fun>
In [5]:
let make0 dim =
  { dim ;
    nnz = 0;
    indices = Bigarray.(Array1.create int fortran_layout) 32 ;
    values = L.Vec.create 32;
  }
Out[5]:
val make0 : int -> t = <fun>
In [77]:
let of_vec ?(threshold=0.) v =
  let dim = L.Vec.dim v in
  let buffer_idx = Bigarray.(Array1.create int     fortran_layout) dim in
  let buffer_val = Bigarray.(Array1.create float64 fortran_layout) dim in
  let check = 
    if threshold = 0. then
      fun x -> x <> 0.
    else
      fun x -> (abs_float x) > 0.
  in
  let rec aux k i =
    if i > dim then
      k-1
    else if check v.{i} then
      ( buffer_idx.{k} <- i ;
        buffer_val.{k} <- v.{i} ;
        aux (k+1) (i+1)
      )
    else
      aux k (i+1)
  in
  let nnz = aux 1 1 in
  let indices = Bigarray.(Array1.create int fortran_layout) nnz in
  let values  = L.Vec.create nnz in
  for i=1 to nnz do
    indices.{i} <- buffer_idx.{i};
    values.{i} <- buffer_val.{i};
  done ;
  { dim ; nnz ; indices ; values }


let of_array ?(threshold=0.) a =
  L.Vec.of_array a
  |> of_vec ~threshold
Out[77]:
val of_vec : ?threshold:float -> Lacaml.D.vec -> t = <fun>
Out[77]:
val of_array : ?threshold:float -> float array -> t = <fun>
In [78]:
let copy t =
  let indices = 
    Bigarray.(Array1.create int fortran_layout) t.nnz
  in
  Bigarray.Array1.blit t.indices indices ;
  let values = L.copy t.values in
  { dim = t.dim ;
    nnz = t.nnz ;
    indices ; values }
Out[78]:
val copy : t -> t = <fun>

Test

In [79]:
let x = make0 10 

let dense_a = 
   of_array [| 1. ; -2. ; 0. ; 0. ; 0.5 ; 1.e-8 ; 0. ; 3. ; 0. |]
   
let sparse_a = of_vec dense_a 

let _ =
    copy sparse_a = sparse_a
    
let _ =
    copy sparse_a == sparse_a

let () = 
    Format.printf "@.@[%a@]@." pp sparse_a
Out[79]:
val x : t =
  {dim = 10; nnz = 0; indices = <abstr>;
   values =
             R1          R2 R3             R30         R31         R32
    6.9331E-310 6.9331E-310  0 ... 6.9331E-310 6.9331E-310 6.9331E-310}
Out[79]:
val dense_a : t =
  {dim = 9; nnz = 5; indices = <abstr>;
   values = R1 R2  R3    R4 R5
             1 -2 0.5 1E-08  3}
File "[79]", line 6, characters 22-29:
Error: This expression has type t but an expression was expected of type
         Lacaml.D.vec =
           (float, Bigarray.float64_elt, Bigarray.fortran_layout)
           Bigarray.Array1.t
   5:    
   6: let sparse_a = of_vec dense_a 
   7: 

aX + Y

Run along all the entries of X and Y simultaneously with indices k and l. m is the index of the new array.

if k<l, update using a*x[k].

if k>l, update using y[l].

if k=l, update using a*x[k] + y[l].

In [105]:
let axpy ?(threshold=0.) ?(alpha=1.) x y =

  if dim x <> dim y then                                                                      
    invalid_arg "Inconsistent dimensions";

  let check = (* Test if value should be added wrt threshold *)
    if threshold = 0. then
      fun x -> x <> 0.
    else
      fun x -> abs_float x > 0.
    in
    
  let f = (* if a=1 in ax+y, then do x+y. If a=0 then do y *)
    if alpha = 1. then
        fun x y -> x +. y
    else if alpha = 0. then
        fun _ y -> y
    else
        fun x y -> alpha *. x +. y
  in
  
  let dim = dim x in
  let nnz = x.nnz + y.nnz in
  let new_indices =  Bigarray.(Array1.create int fortran_layout) nnz in
  let new_values = L.Vec.create nnz in
  
  let rec aux k l m =
    match k <= x.nnz, l <= y.nnz with
    | true , true  -> (* Both arrays are running *)
      begin
        if x.indices.{k} < y.indices.{l} then (
          let w = f x.values.{k} 0. in
          if check w then (
              new_indices.{m} <- x.indices.{k};
              new_values.{m} <- w
          );
          (aux [@tailcall]) (k+1) l (m+1)
        )
        else if x.indices.{k} > y.indices.{l} then (
          let w = y.values.{l} in
          if check w then (
              new_indices.{m} <- y.indices.{l};
              new_values.{m} <- w
          );
          (aux [@tailcall]) k (l+1) (m+1)
        )
        else (
          let w = f x.values.{k} y.values.{l} in
          if check w then (
              new_indices.{m} <- x.indices.{k};
              new_values.{m} <- w
          );
          (aux [@tailcall]) (k+1) (l+1) (m+1)
        )
      end
    | false, true  -> (* Array x is done running *)
      begin
        let m = ref m in
        for i=l to y.nnz do
          let w = y.values.{i} in
          if check w then (
              new_indices.{!m} <- y.indices.{i};
              new_values.{!m}  <- w;
              incr m;
          )
        done; !m
      end
    | true, false  -> (* Array y is done running *)
      begin
        let m = ref m in
        for i=k to x.nnz do
          let w = alpha *. x.values.{i} in
          if check w then (
              new_indices.{!m} <- x.indices.{i};
              new_values.{!m}  <- w;
              incr m;
          )
        done; !m
      end
    | false, false -> (* Both arrays are done    *)
      m
  in
  let nnz = (aux 1 1 1) - 1 in
  { dim ; nnz ;
    indices = new_indices ; values = new_values }
Out[105]:
val axpy : ?threshold:float -> ?alpha:float -> t -> t -> t = <fun>

Test

In [108]:
let m_A = L.Vec.of_array [| 1. ; 2. ; 0. ; 0. ; 0.01 ; -2. ; 0. ; -1.e-3 ; 0. ; 0.|]
let m_B = L.Vec.of_array [| 0. ; 1. ; 2. ; 0. ; 0. ; 0.01 ; -2. ; 0. ; -1.e-3 ; 2. |]

let m_As = of_vec m_A
let m_Bs = of_vec m_B

let m_C = L.copy m_B ;;
L.axpy ~alpha:2. m_A m_C;;
m_C;;

let m_D = L.copy m_A;;
L.axpy ~alpha:2. m_B m_D;;
m_D;;

let m_Cs = axpy ~alpha:2. m_As m_Bs


let _ =  of_vec m_C = m_Cs;;

let m_Ds = axpy ~alpha:2. m_Bs m_As

let _ = of_vec m_D = m_Ds;;

L.Vec.iteri (fun i x -> Format.printf "%d %f %f\n%!" i x (get m_Cs i)) m_C;;
L.Vec.iteri (fun i x -> Format.printf "%d %f %f\n%!" i x (get m_Ds i)) m_D;;
Out[108]:
val m_A : Lacaml.D.vec = R1 R2 R3         R8 R9 R10
                          1  2  0 ... -0.001  0   0
Out[108]:
val m_B : Lacaml.D.vec = R1 R2 R3     R8     R9 R10
                          0  1  2 ...  0 -0.001   2
Out[108]:
val m_As : t =
  {dim = 10; nnz = 5; indices = <abstr>;
   values = R1 R2   R3 R4     R5
             1  2 0.01 -2 -0.001}
Out[108]:
val m_Bs : t =
  {dim = 10; nnz = 6; indices = <abstr>;
   values = R1 R2   R3 R4     R5 R6
             1  2 0.01 -2 -0.001  2}
Out[108]:
val m_C : L.vec = R1 R2 R3     R8     R9 R10
                   0  1  2 ...  0 -0.001   2
Out[108]:
- : unit = ()
Out[108]:
- : L.vec = R1 R2 R3         R8     R9 R10
             2  5  2 ... -0.002 -0.001   2
Out[108]:
val m_D : L.vec = R1 R2 R3         R8 R9 R10
                   1  2  0 ... -0.001  0   0
Out[108]:
- : unit = ()
Out[108]:
- : L.vec = R1 R2 R3         R8     R9 R10
             1  4  4 ... -0.001 -0.002   4
Out[108]:
val m_Cs : t =
  {dim = 10; nnz = 9; indices = <abstr>;
   values =
    R1 R2 R3     R9         R10          R11
     2  5  2 ...  2 1.04136E-71 8.95166E+271}
Out[108]:
- : bool = false
Out[108]:
val m_Ds : t =
  {dim = 10; nnz = 9; indices = <abstr>;
   values =
    R1 R2 R3     R9         R10         R11
     1  4  4 ...  4 6.82475E-38 8.26993E-72}
Out[108]:
- : bool = false
1 2.000000 2.000000
2 5.000000 5.000000
3 2.000000 2.000000
4 0.000000 0.000000
5 0.020000 0.020000
6 -3.990000 -3.990000
7 -2.000000 -2.000000
8 -0.002000 -0.002000
9 -0.001000 -0.001000
10 2.000000 2.000000
Out[108]:
- : unit = ()
1 1.000000 1.000000
2 4.000000 4.000000
3 4.000000 4.000000
4 0.000000 0.000000
5 0.010000 0.010000
6 -1.980000 -1.980000
7 -4.000000 -4.000000
8 -0.001000 -0.001000
9 -0.002000 -0.002000
10 4.000000 4.000000
Out[108]:
- : unit = ()

Accessors

In [24]:
let dim t = t.dim
let nnz t = t.nnz
let indices t = t.indices
let values  t = t.values
Out[24]:
val dim : t -> int = <fun>
Out[24]:
val nnz : t -> int = <fun>
Out[24]:
val indices :
  t -> (int, Bigarray.int_elt, Bigarray.fortran_layout) Bigarray.Array1.t =
  <fun>
Out[24]:
val values : t -> L.Vec.t = <fun>
In [25]:
let get t i =
  if i < 1 || i > dim t then invalid_arg "index out of bounds";
                                                                                              
  let rec binary_search index value low high =
    if high = low then
      if index.{low} = value then
        low
      else
        raise Not_found
    else let mid = (low + high) / 2 in
      if index.{mid} > value then
        binary_search index value low (mid - 1)
      else if index.{mid} < value then
        binary_search index value (mid + 1) high
      else
        mid
  in
  try
    let k = 
      let id = indices t in
      binary_search id i id.{1} (nnz t)
    in
    t.values.{k}
  with Not_found -> 0.
Out[25]:
val get : t -> int -> float = <fun>
In [26]:
let iter f t =
  for k=1 to nnz t do
    f t.indices.{k} t.values.{k}
  done
Out[26]:
val iter : (int -> float -> 'a) -> t -> unit = <fun>

Test

In [28]:
dense_a = L.Vec.init (dim sparse_a) (get sparse_a);;
iter (fun i v -> Printf.printf "%d %f\n%!" i v) sparse_a;;
Out[28]:
- : bool = true
1 1.000000
2 -2.000000
5 0.500000
6 0.000000
8 3.000000
Out[28]:
- : unit = ()

Converters

In [30]:
let to_assoc_list t = 
  let rec aux k accu =
    if k = 0 then
      accu
    else
      aux (k-1) ( (t.indices.{k}, t.values.{k})::accu )
  in
  aux (nnz t) []
  
  
let to_vec t =
  let result = L.Vec.make0 (dim t) in
  iter (fun k v -> result.{k} <- v) t;
  result
Out[30]:
val to_assoc_list : t -> (int * float) list = <fun>
Out[30]:
val to_vec : t -> Lacaml.D.vec = <fun>

Test

In [31]:
to_assoc_list sparse_a;;
to_vec sparse_a = dense_a;;
Out[31]:
- : (int * float) list = [(1, 1.); (2, -2.); (5, 0.5); (6, 1e-08); (8, 3.)]
Out[31]:
- : bool = true

Operations

One-vector operations

In [64]:
let immutable f t =
  let result = copy t in
  f result;
  result
  
let scale_mut x t = 
  L.scal x t.values 

let scale x = immutable @@ scale_mut x
  
let neg t =
  { t with values = L.Vec.neg t.values }
Out[64]:
val immutable : (t -> 'a) -> t -> t = <fun>
Out[64]:
val scale_mut : float -> t -> unit = <fun>
Out[64]:
val scale : float -> t -> t = <fun>
Out[64]:
val neg : t -> t = <fun>

Test

In [67]:
let sparse_b = copy sparse_a;;
scale_mut 0.5 sparse_b;;
let sparse_c = scale 0.5 sparse_a;;
Format.printf "%a@." pp sparse_a;;
Format.printf "%a@." pp sparse_b;;
Format.printf "%a@." pp sparse_c;;
Format.printf "%a@." pp (neg sparse_a);;
    
let _ =
    let n1 = neg sparse_a in
    neg n1 = sparse_a
Out[67]:
val sparse_b : t =
  {dim = 9; nnz = 5; indices = <abstr>;
   values = R1 R2  R3    R4 R5
             1 -2 0.5 1E-08  3}
Out[67]:
- : unit = ()
Out[67]:
val sparse_c : t =
  {dim = 9; nnz = 5; indices = <abstr>;
   values =  R1 R2   R3    R4  R5
            0.5 -1 0.25 5E-09 1.5}
{dim: 9
(1, 1.000000) (2, -2.000000) (5, 0.500000) (6, 0.000000) (8, 3.000000) }
Out[67]:
- : unit = ()
{dim: 9
(1, 0.500000) (2, -1.000000) (5, 0.250000) (6, 0.000000) (8, 1.500000) }
Out[67]:
- : unit = ()
{dim: 9
(1, 0.500000) (2, -1.000000) (5, 0.250000) (6, 0.000000) (8, 1.500000) }
Out[67]:
- : unit = ()
{dim: 9
(1, -1.000000) (2, 2.000000) (5, -0.500000) (6, -0.000000) (8, -3.000000) }
Out[67]:
- : unit = ()
Out[67]:
- : bool = true
In [ ]:

</html>