From 249c145d0c6c525175e0ea5f67a50446a8ea3980 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Apr 2021 10:59:36 +0200 Subject: [PATCH] Added tile --- src/qmckl_tile.org | 496 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 496 insertions(+) create mode 100644 src/qmckl_tile.org diff --git a/src/qmckl_tile.org b/src/qmckl_tile.org new file mode 100644 index 0000000..189944b --- /dev/null +++ b/src/qmckl_tile.org @@ -0,0 +1,496 @@ +#+TITLE: Tiled arrays +#+SETUPFILE: ../docs/theme.setup + +To increase performance, matrices may be stored as tiled +arrays. Instead of storing a matrix in a two-dimensional array, it may +be stored as a two dimensional array of small matrices (a rank 4 +tensor). This improves the locality of the data in matrix +multiplications, and also enables the possibility to use BLAS3 +while also exploiting part of the sparse structure of the matrices. + + Tile + │ ┌──────┬──────┬──────┐ + │ │1 4 7 │ │ │ + └───────►│2 5 8 │ T_12 │ T_13 │ + │3 6 9 │ │ │ + ├──────┼──────┼──────┤ + │ │ │ │ + │ T_21 │ T_22 │ T_23 │ + │ │ │ │ + ├──────┼──────┼──────┤ + │ │ │ │ + │ T_31 │ T_32 │ T_33 │ + │ │ │ │ + └──────┴──────┴──────┘ + + In this file, tiled matrice will be produced for the following + types: + + #+NAME: types + | float | + | double | + +* Headers :noexport: + + #+NAME: filename + #+begin_src elisp :tangle no +(file-name-nondirectory (substring buffer-file-name 0 -4)) + #+end_src + + #+begin_src c :tangle (eval h_private_type) +#ifndef QMCKL_TILE_HPT +#define QMCKL_TILE_HPT + #+end_src + + #+begin_src c :tangle (eval c_test) :noweb yes +#include "qmckl.h" +#include "munit.h" +MunitResult test_<>() { + qmckl_context context; + context = qmckl_context_create(); + #+end_src + + #+begin_src c :tangle (eval c) +#include +#include +#include +#include +#include + +#include "qmckl_context_type.h" +#include "qmckl_error_type.h" +#include "qmckl_memory_private_type.h" +#include "qmckl_tile_private_type.h" + +#include "qmckl_context_func.h" +#include "qmckl_error_func.h" +#include "qmckl_memory_private_func.h" +#include "qmckl_tile_private_func.h" + #+end_src + +* Data structures + +** Tile + + A tile is a small matrix of fixed size. The dimensions of the + tiles is fixed at compile-time to increase performance. It is + defined as $2^s$: + + | s | tile size | + |----+-----------| + | 2 | 4 | + | 3 | 8 | + | 4 | 16 | + | 55 | 32 | + | 6 | 64 | + | 7 | 128 | + + + #+begin_src c :tangle (eval h_private_type) +#define TILE_SIZE_SHIFT 3 +#define TILE_SIZE 8 + #+end_src + + + #+NAME: tile_hpt + #+begin_src c +typedef struct $T$_tile_struct { + $T$ element[TILE_SIZE][TILE_SIZE]; + int32_t is_null; + int32_t padding; +} $T$_tile_struct; + #+end_src + +** Tiled matrix + + A tiled matrix is a two-dimensional array of tiles. + + #+NAME: matrix_hpt + #+begin_src c +typedef struct $T$_tiled_matrix { + $T$_tile_struct** tile; + size_t rows; + size_t cols; +} $T$_tiled_matrix; + #+end_src + + When a tiled matrix is initialized, it is set to zero. + + #+NAME: init_hpf + #+begin_src c +qmckl_exit_code $T$_tiled_matrix_init (qmckl_context context, + $T$_tiled_matrix* m, + size_t rows, + size_t cols); + #+end_src + + #+NAME: init_c + #+begin_src c +qmckl_exit_code $T$_tiled_matrix_init (qmckl_context context, + $T$_tiled_matrix* m, + size_t rows, + size_t cols) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (m == NULL) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_2, + "$T$_tiled_matrix_init", + NULL); + } + + if (rows == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_3, + "$T$_tiled_matrix_init", + NULL); + } + + if (cols == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_4, + "$T$_tiled_matrix_init", + NULL); + } + + qmckl_memory_info_struct info = qmckl_memory_info_struct_zero; + size_t n = rows * cols; + + /* Check overflow */ + if (n/cols != rows + || n > SIZE_MAX / sizeof($T$_tile_struct) ) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "$T$_tiled_matrix_init", + "rows * cols overflows" ); + } + + /* Allocate array of column pointers */ + info.size = cols * sizeof($T$_tile_struct*) ; + m->tile = ($T$_tile_struct**) qmckl_malloc(context, info); + + if (m->tile == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "$T$_tiled_matrix_init", + NULL); + } + + + /* Allocate array of tiles */ + info.size = n * sizeof($T$_tile_struct) ; + m->tile[0] = ($T$_tile_struct*) qmckl_malloc(context, info); + + if (m->tile[0] == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "$T$_tiled_matrix_init", + NULL); + } + + /* Compute array of pointers to the 1st element of columns */ + for (size_t i=1 ; itile[i] = m->tile[i-1] + rows; + } + + m->rows = rows; + m->cols = cols; + return QMCKL_SUCCESS; +} + + #+end_src + +* Write templates + + #+begin_src python :noweb yes :results drawer :var types=types :exports results +def generate(f, text): + result = [ f"#+begin_src c :tangle (eval {f})" ] + for t in types: + t=t[0] + result += [ text.replace("$T$",t), "" ] + + result += [ "#+end_src" ] + return '\n'.join(result) + +return '\n'.join( [ "" + +, generate("h_private_type", """ +<> + +<> +""") + +, "" + +, generate("h_private_func", """ +<> +""") + +, "" + +, generate("c", """ +<> +""") + +] ) + #+end_src + + #+RESULTS: + :results: + + #+begin_src c :tangle (eval h_private_type) + + typedef struct float_tile_struct { + float element[TILE_SIZE][TILE_SIZE]; + int32_t is_null; + int32_t padding; + } float_tile_struct; + + typedef struct float_tiled_matrix { + float_tile_struct** tile; + size_t rows; + size_t cols; + } float_tiled_matrix; + + + + typedef struct double_tile_struct { + double element[TILE_SIZE][TILE_SIZE]; + int32_t is_null; + int32_t padding; + } double_tile_struct; + + typedef struct double_tiled_matrix { + double_tile_struct** tile; + size_t rows; + size_t cols; + } double_tiled_matrix; + + + #+end_src + + #+begin_src c :tangle (eval h_private_func) + + qmckl_exit_code float_tiled_matrix_init (qmckl_context context, + float_tiled_matrix* m, + size_t rows, + size_t cols); + + + + qmckl_exit_code double_tiled_matrix_init (qmckl_context context, + double_tiled_matrix* m, + size_t rows, + size_t cols); + + + #+end_src + + #+begin_src c :tangle (eval c) + + qmckl_exit_code float_tiled_matrix_init (qmckl_context context, + float_tiled_matrix* m, + size_t rows, + size_t cols) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (m == NULL) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_2, + "float_tiled_matrix_init", + NULL); + } + + if (rows == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_3, + "float_tiled_matrix_init", + NULL); + } + + if (cols == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_4, + "float_tiled_matrix_init", + NULL); + } + + qmckl_memory_info_struct info = qmckl_memory_info_struct_zero; + size_t n = rows * cols; + + /* Check overflow */ + if (n/cols != rows + || n > SIZE_MAX / sizeof(float_tile_struct) ) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "float_tiled_matrix_init", + "rows * cols overflows" ); + } + + /* Allocate array of column pointers */ + info.size = cols * sizeof(float_tile_struct*) ; + m->tile = (float_tile_struct**) qmckl_malloc(context, info); + + if (m->tile == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "float_tiled_matrix_init", + NULL); + } + + + /* Allocate array of tiles */ + info.size = n * sizeof(float_tile_struct) ; + m->tile[0] = (float_tile_struct*) qmckl_malloc(context, info); + + if (m->tile[0] == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "float_tiled_matrix_init", + NULL); + } + + /* Compute array of pointers to the 1st element of columns */ + for (size_t i=1 ; itile[i] = m->tile[i-1] + rows; + } + + m->rows = rows; + m->cols = cols; + return QMCKL_SUCCESS; + } + + + + + qmckl_exit_code double_tiled_matrix_init (qmckl_context context, + double_tiled_matrix* m, + size_t rows, + size_t cols) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (m == NULL) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_2, + "double_tiled_matrix_init", + NULL); + } + + if (rows == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_3, + "double_tiled_matrix_init", + NULL); + } + + if (cols == (size_t) 0) { + return qmckl_failwith(context, + QMCKL_INVALID_ARG_4, + "double_tiled_matrix_init", + NULL); + } + + qmckl_memory_info_struct info = qmckl_memory_info_struct_zero; + size_t n = rows * cols; + + /* Check overflow */ + if (n/cols != rows + || n > SIZE_MAX / sizeof(double_tile_struct) ) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "double_tiled_matrix_init", + "rows * cols overflows" ); + } + + /* Allocate array of column pointers */ + info.size = cols * sizeof(double_tile_struct*) ; + m->tile = (double_tile_struct**) qmckl_malloc(context, info); + + if (m->tile == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "double_tiled_matrix_init", + NULL); + } + + + /* Allocate array of tiles */ + info.size = n * sizeof(double_tile_struct) ; + m->tile[0] = (double_tile_struct*) qmckl_malloc(context, info); + + if (m->tile[0] == NULL) { + return qmckl_failwith(context, + QMCKL_ALLOCATION_FAILED, + "double_tiled_matrix_init", + NULL); + } + + /* Compute array of pointers to the 1st element of columns */ + for (size_t i=1 ; itile[i] = m->tile[i-1] + rows; + } + + m->rows = rows; + m->cols = cols; + return QMCKL_SUCCESS; + } + + + + #+end_src + :end: + +* End of files :noexport: + + #+begin_src c :tangle (eval h_private_type) +#endif + #+end_src + +*** Test + #+begin_src c :tangle (eval c_test) + if (qmckl_context_destroy(context) != QMCKL_SUCCESS) + return QMCKL_FAILURE; + return MUNIT_OK; +} + #+end_src + +**✸ Compute file names + #+begin_src emacs-lisp +; The following is required to compute the file names + +(setq pwd (file-name-directory buffer-file-name)) +(setq name (file-name-nondirectory (substring buffer-file-name 0 -4))) +(setq f (concat pwd name "_f.f90")) +(setq fh (concat pwd name "_fh.f90")) +(setq c (concat pwd name ".c")) +(setq h (concat name ".h")) +(setq h_private (concat name "_private.h")) +(setq c_test (concat pwd "test_" name ".c")) +(setq f_test (concat pwd "test_" name "_f.f90")) + +; Minted +(require 'ox-latex) +(setq org-latex-listings 'minted) +(add-to-list 'org-latex-packages-alist '("" "listings")) +(add-to-list 'org-latex-packages-alist '("" "color")) + + #+end_src + + #+RESULTS: + | | color | + | | listings | + + +# -*- mode: org -*- +# vim: syntax=c + +