mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Added tile
This commit is contained in:
parent
aa8a1fd3b1
commit
249c145d0c
496
src/qmckl_tile.org
Normal file
496
src/qmckl_tile.org
Normal file
@ -0,0 +1,496 @@
|
||||
#+TITLE: Tiled arrays
|
||||
#+SETUPFILE: ../docs/theme.setup
|
||||
|
||||
To increase performance, matrices may be stored as tiled
|
||||
arrays. Instead of storing a matrix in a two-dimensional array, it may
|
||||
be stored as a two dimensional array of small matrices (a rank 4
|
||||
tensor). This improves the locality of the data in matrix
|
||||
multiplications, and also enables the possibility to use BLAS3
|
||||
while also exploiting part of the sparse structure of the matrices.
|
||||
|
||||
Tile
|
||||
│ ┌──────┬──────┬──────┐
|
||||
│ │1 4 7 │ │ │
|
||||
└───────►│2 5 8 │ T_12 │ T_13 │
|
||||
│3 6 9 │ │ │
|
||||
├──────┼──────┼──────┤
|
||||
│ │ │ │
|
||||
│ T_21 │ T_22 │ T_23 │
|
||||
│ │ │ │
|
||||
├──────┼──────┼──────┤
|
||||
│ │ │ │
|
||||
│ T_31 │ T_32 │ T_33 │
|
||||
│ │ │ │
|
||||
└──────┴──────┴──────┘
|
||||
|
||||
In this file, tiled matrice will be produced for the following
|
||||
types:
|
||||
|
||||
#+NAME: types
|
||||
| float |
|
||||
| double |
|
||||
|
||||
* Headers :noexport:
|
||||
|
||||
#+NAME: filename
|
||||
#+begin_src elisp :tangle no
|
||||
(file-name-nondirectory (substring buffer-file-name 0 -4))
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval h_private_type)
|
||||
#ifndef QMCKL_TILE_HPT
|
||||
#define QMCKL_TILE_HPT
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval c_test) :noweb yes
|
||||
#include "qmckl.h"
|
||||
#include "munit.h"
|
||||
MunitResult test_<<filename()>>() {
|
||||
qmckl_context context;
|
||||
context = qmckl_context_create();
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval c)
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "qmckl_context_type.h"
|
||||
#include "qmckl_error_type.h"
|
||||
#include "qmckl_memory_private_type.h"
|
||||
#include "qmckl_tile_private_type.h"
|
||||
|
||||
#include "qmckl_context_func.h"
|
||||
#include "qmckl_error_func.h"
|
||||
#include "qmckl_memory_private_func.h"
|
||||
#include "qmckl_tile_private_func.h"
|
||||
#+end_src
|
||||
|
||||
* Data structures
|
||||
|
||||
** Tile
|
||||
|
||||
A tile is a small matrix of fixed size. The dimensions of the
|
||||
tiles is fixed at compile-time to increase performance. It is
|
||||
defined as $2^s$:
|
||||
|
||||
| s | tile size |
|
||||
|----+-----------|
|
||||
| 2 | 4 |
|
||||
| 3 | 8 |
|
||||
| 4 | 16 |
|
||||
| 55 | 32 |
|
||||
| 6 | 64 |
|
||||
| 7 | 128 |
|
||||
|
||||
|
||||
#+begin_src c :tangle (eval h_private_type)
|
||||
#define TILE_SIZE_SHIFT 3
|
||||
#define TILE_SIZE 8
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME: tile_hpt
|
||||
#+begin_src c
|
||||
typedef struct $T$_tile_struct {
|
||||
$T$ element[TILE_SIZE][TILE_SIZE];
|
||||
int32_t is_null;
|
||||
int32_t padding;
|
||||
} $T$_tile_struct;
|
||||
#+end_src
|
||||
|
||||
** Tiled matrix
|
||||
|
||||
A tiled matrix is a two-dimensional array of tiles.
|
||||
|
||||
#+NAME: matrix_hpt
|
||||
#+begin_src c
|
||||
typedef struct $T$_tiled_matrix {
|
||||
$T$_tile_struct** tile;
|
||||
size_t rows;
|
||||
size_t cols;
|
||||
} $T$_tiled_matrix;
|
||||
#+end_src
|
||||
|
||||
When a tiled matrix is initialized, it is set to zero.
|
||||
|
||||
#+NAME: init_hpf
|
||||
#+begin_src c
|
||||
qmckl_exit_code $T$_tiled_matrix_init (qmckl_context context,
|
||||
$T$_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols);
|
||||
#+end_src
|
||||
|
||||
#+NAME: init_c
|
||||
#+begin_src c
|
||||
qmckl_exit_code $T$_tiled_matrix_init (qmckl_context context,
|
||||
$T$_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (m == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_2,
|
||||
"$T$_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (rows == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_3,
|
||||
"$T$_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (cols == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_4,
|
||||
"$T$_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
qmckl_memory_info_struct info = qmckl_memory_info_struct_zero;
|
||||
size_t n = rows * cols;
|
||||
|
||||
/* Check overflow */
|
||||
if (n/cols != rows
|
||||
|| n > SIZE_MAX / sizeof($T$_tile_struct) ) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"$T$_tiled_matrix_init",
|
||||
"rows * cols overflows" );
|
||||
}
|
||||
|
||||
/* Allocate array of column pointers */
|
||||
info.size = cols * sizeof($T$_tile_struct*) ;
|
||||
m->tile = ($T$_tile_struct**) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"$T$_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
/* Allocate array of tiles */
|
||||
info.size = n * sizeof($T$_tile_struct) ;
|
||||
m->tile[0] = ($T$_tile_struct*) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile[0] == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"$T$_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Compute array of pointers to the 1st element of columns */
|
||||
for (size_t i=1 ; i<cols ; ++i) {
|
||||
m->tile[i] = m->tile[i-1] + rows;
|
||||
}
|
||||
|
||||
m->rows = rows;
|
||||
m->cols = cols;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
* Write templates
|
||||
|
||||
#+begin_src python :noweb yes :results drawer :var types=types :exports results
|
||||
def generate(f, text):
|
||||
result = [ f"#+begin_src c :tangle (eval {f})" ]
|
||||
for t in types:
|
||||
t=t[0]
|
||||
result += [ text.replace("$T$",t), "" ]
|
||||
|
||||
result += [ "#+end_src" ]
|
||||
return '\n'.join(result)
|
||||
|
||||
return '\n'.join( [ ""
|
||||
|
||||
, generate("h_private_type", """
|
||||
<<tile_hpt>>
|
||||
|
||||
<<matrix_hpt>>
|
||||
""")
|
||||
|
||||
, ""
|
||||
|
||||
, generate("h_private_func", """
|
||||
<<init_hpf>>
|
||||
""")
|
||||
|
||||
, ""
|
||||
|
||||
, generate("c", """
|
||||
<<init_c>>
|
||||
""")
|
||||
|
||||
] )
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
:results:
|
||||
|
||||
#+begin_src c :tangle (eval h_private_type)
|
||||
|
||||
typedef struct float_tile_struct {
|
||||
float element[TILE_SIZE][TILE_SIZE];
|
||||
int32_t is_null;
|
||||
int32_t padding;
|
||||
} float_tile_struct;
|
||||
|
||||
typedef struct float_tiled_matrix {
|
||||
float_tile_struct** tile;
|
||||
size_t rows;
|
||||
size_t cols;
|
||||
} float_tiled_matrix;
|
||||
|
||||
|
||||
|
||||
typedef struct double_tile_struct {
|
||||
double element[TILE_SIZE][TILE_SIZE];
|
||||
int32_t is_null;
|
||||
int32_t padding;
|
||||
} double_tile_struct;
|
||||
|
||||
typedef struct double_tiled_matrix {
|
||||
double_tile_struct** tile;
|
||||
size_t rows;
|
||||
size_t cols;
|
||||
} double_tiled_matrix;
|
||||
|
||||
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval h_private_func)
|
||||
|
||||
qmckl_exit_code float_tiled_matrix_init (qmckl_context context,
|
||||
float_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols);
|
||||
|
||||
|
||||
|
||||
qmckl_exit_code double_tiled_matrix_init (qmckl_context context,
|
||||
double_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols);
|
||||
|
||||
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval c)
|
||||
|
||||
qmckl_exit_code float_tiled_matrix_init (qmckl_context context,
|
||||
float_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (m == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_2,
|
||||
"float_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (rows == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_3,
|
||||
"float_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (cols == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_4,
|
||||
"float_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
qmckl_memory_info_struct info = qmckl_memory_info_struct_zero;
|
||||
size_t n = rows * cols;
|
||||
|
||||
/* Check overflow */
|
||||
if (n/cols != rows
|
||||
|| n > SIZE_MAX / sizeof(float_tile_struct) ) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"float_tiled_matrix_init",
|
||||
"rows * cols overflows" );
|
||||
}
|
||||
|
||||
/* Allocate array of column pointers */
|
||||
info.size = cols * sizeof(float_tile_struct*) ;
|
||||
m->tile = (float_tile_struct**) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"float_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
/* Allocate array of tiles */
|
||||
info.size = n * sizeof(float_tile_struct) ;
|
||||
m->tile[0] = (float_tile_struct*) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile[0] == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"float_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Compute array of pointers to the 1st element of columns */
|
||||
for (size_t i=1 ; i<cols ; ++i) {
|
||||
m->tile[i] = m->tile[i-1] + rows;
|
||||
}
|
||||
|
||||
m->rows = rows;
|
||||
m->cols = cols;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
qmckl_exit_code double_tiled_matrix_init (qmckl_context context,
|
||||
double_tiled_matrix* m,
|
||||
size_t rows,
|
||||
size_t cols) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (m == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_2,
|
||||
"double_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (rows == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_3,
|
||||
"double_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (cols == (size_t) 0) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_INVALID_ARG_4,
|
||||
"double_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
qmckl_memory_info_struct info = qmckl_memory_info_struct_zero;
|
||||
size_t n = rows * cols;
|
||||
|
||||
/* Check overflow */
|
||||
if (n/cols != rows
|
||||
|| n > SIZE_MAX / sizeof(double_tile_struct) ) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"double_tiled_matrix_init",
|
||||
"rows * cols overflows" );
|
||||
}
|
||||
|
||||
/* Allocate array of column pointers */
|
||||
info.size = cols * sizeof(double_tile_struct*) ;
|
||||
m->tile = (double_tile_struct**) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"double_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
/* Allocate array of tiles */
|
||||
info.size = n * sizeof(double_tile_struct) ;
|
||||
m->tile[0] = (double_tile_struct*) qmckl_malloc(context, info);
|
||||
|
||||
if (m->tile[0] == NULL) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_ALLOCATION_FAILED,
|
||||
"double_tiled_matrix_init",
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Compute array of pointers to the 1st element of columns */
|
||||
for (size_t i=1 ; i<cols ; ++i) {
|
||||
m->tile[i] = m->tile[i-1] + rows;
|
||||
}
|
||||
|
||||
m->rows = rows;
|
||||
m->cols = cols;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#+end_src
|
||||
:end:
|
||||
|
||||
* End of files :noexport:
|
||||
|
||||
#+begin_src c :tangle (eval h_private_type)
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
*** Test
|
||||
#+begin_src c :tangle (eval c_test)
|
||||
if (qmckl_context_destroy(context) != QMCKL_SUCCESS)
|
||||
return QMCKL_FAILURE;
|
||||
return MUNIT_OK;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
**✸ Compute file names
|
||||
#+begin_src emacs-lisp
|
||||
; The following is required to compute the file names
|
||||
|
||||
(setq pwd (file-name-directory buffer-file-name))
|
||||
(setq name (file-name-nondirectory (substring buffer-file-name 0 -4)))
|
||||
(setq f (concat pwd name "_f.f90"))
|
||||
(setq fh (concat pwd name "_fh.f90"))
|
||||
(setq c (concat pwd name ".c"))
|
||||
(setq h (concat name ".h"))
|
||||
(setq h_private (concat name "_private.h"))
|
||||
(setq c_test (concat pwd "test_" name ".c"))
|
||||
(setq f_test (concat pwd "test_" name "_f.f90"))
|
||||
|
||||
; Minted
|
||||
(require 'ox-latex)
|
||||
(setq org-latex-listings 'minted)
|
||||
(add-to-list 'org-latex-packages-alist '("" "listings"))
|
||||
(add-to-list 'org-latex-packages-alist '("" "color"))
|
||||
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
| | color |
|
||||
| | listings |
|
||||
|
||||
|
||||
# -*- mode: org -*-
|
||||
# vim: syntax=c
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user