mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-12-22 04:14:40 +01:00
Added notes for sparse data structures
This commit is contained in:
parent
87bb6fcb6e
commit
cd8332a3e5
131
Sparse.org
Normal file
131
Sparse.org
Normal file
@ -0,0 +1,131 @@
|
||||
|
||||
* JSON
|
||||
"ao_2e_int" : {
|
||||
"eri_num" : [ "int", [ ] ]
|
||||
"eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]
|
||||
},
|
||||
|
||||
* Front end
|
||||
|
||||
For example, the integral $\langle ij | kl \rangle = x$ can be
|
||||
represented represented as
|
||||
- a quartet of integers $(i,j,k,l)$
|
||||
- a floating point value $x$
|
||||
|
||||
To store $N$ integrals in the file, we can store
|
||||
- An array of quartets of integers
|
||||
- An array of values (floats)
|
||||
|
||||
These two arrays have the same size, $N$.
|
||||
|
||||
As the number of integrals to store can be prohibitively large, it
|
||||
is important to be able to read/write the integrals in chunks. So we
|
||||
need to give two extra parameters to the functions:
|
||||
- ~offset~ : the index of the 1st integral we want to read. An
|
||||
offset of zero implies to read the first integral
|
||||
- ~num~ : the number of integrals to read
|
||||
|
||||
We need to provide one function to read a chunk of indices, and one
|
||||
function to read a chunk of values, because some users might want to
|
||||
read only the values of the integrals, or only the indices.
|
||||
|
||||
Here is an example for the indices:
|
||||
|
||||
#+BEGIN_SRC c
|
||||
trexio_exit_code
|
||||
trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,
|
||||
const int64_t offset,
|
||||
const int64_t num,
|
||||
int32_t* buffer)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (offset < 0L) return TREXIO_INVALID_ARG_2;
|
||||
if (num < 0L) return TREXIO_INVALID_ARG_3;
|
||||
|
||||
const uint32_t rank = 4; // To be set by generator : number of indices
|
||||
|
||||
int64_t nmax; // Max number of integrals
|
||||
trexio_exit_code rc;
|
||||
|
||||
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
|
||||
if (rc != TREXIO_SUCCESS) return rc;
|
||||
|
||||
switch (file->back_end) {
|
||||
|
||||
case TREXIO_TEXT:
|
||||
return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||||
break;
|
||||
|
||||
case TREXIO_HDF5:
|
||||
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||||
break;
|
||||
/*
|
||||
case TREXIO_JSON:
|
||||
return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||||
break;
|
||||
,*/
|
||||
default:
|
||||
return TREXIO_FAILURE; /* Impossible case */
|
||||
}
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
For the values,
|
||||
|
||||
#+BEGIN_SRC c
|
||||
trexio_exit_code
|
||||
trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
|
||||
const int64_t offset,
|
||||
const int64_t num,
|
||||
double* buffer)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (offset < 0L) return TREXIO_INVALID_ARG_2;
|
||||
if (num < 0L) return TREXIO_INVALID_ARG_3;
|
||||
|
||||
int64_t nmax; // Max number of integrals
|
||||
trexio_exit_code rc;
|
||||
|
||||
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
|
||||
if (rc != TREXIO_SUCCESS) return rc;
|
||||
|
||||
switch (file->back_end) {
|
||||
|
||||
case TREXIO_TEXT:
|
||||
return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);
|
||||
break;
|
||||
|
||||
case TREXIO_HDF5:
|
||||
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
|
||||
break;
|
||||
/*
|
||||
case TREXIO_JSON:
|
||||
return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
|
||||
break;
|
||||
,*/
|
||||
default:
|
||||
return TREXIO_FAILURE; /* Impossible case */
|
||||
}
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
* Text back end
|
||||
As the size of the dataset should be extensible, the simplest
|
||||
solution is to use one file for each sparse data set, and store a
|
||||
the name of this file in the group.
|
||||
Each integral can be a line in the file:
|
||||
i j k l x
|
||||
which can be read with "%10ld %10ld %10ld %10ld %24.16e".
|
||||
The offset can be used with ~fseek(69L*offset, SEEK_SET)~
|
||||
|
||||
* HDF5 Back end
|
||||
|
||||
We need to declare the number of rows of the dataset as
|
||||
~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
|
||||
chunks should absolutely not be larger than 1MB.
|
||||
|
||||
To extend the storage, see :
|
||||
https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
|
||||
(figure 17)
|
||||
|
||||
If the offset+num > nmax, we need to extend the dataset.
|
@ -1,4 +1,4 @@
|
||||
d+TITLE: Front end API
|
||||
#+TITLE: Front end API
|
||||
#+PROPERTY: comments org
|
||||
#+SETUPFILE: ../../docs/theme.setup
|
||||
# -*- mode: org -*-
|
||||
@ -106,6 +106,7 @@ typedef int32_t trexio_exit_code;
|
||||
#+begin_src c :tangle trexio_private.h
|
||||
#define TREXIO_MAX_FILENAME_LENGTH 4096
|
||||
#+end_src
|
||||
|
||||
* Front end
|
||||
|
||||
All calls to TREXIO are thread-safe.
|
||||
@ -694,69 +695,76 @@ end interface
|
||||
}
|
||||
#+end_src
|
||||
|
||||
~TREXIO~ is generated automatically by the ~generator.py~ Python script
|
||||
based on the tree-like configuration provided in the ~trex.json~ file.
|
||||
Because of that, generalized templates can be implemented and re-used.
|
||||
This approach minimizes the number of bugs as compared with manual copy-paste-modify scheme.
|
||||
~TREXIO~ is generated automatically by the ~generator.py~ Python
|
||||
script based on the tree-like configuration provided in the
|
||||
~trex.json~ file. Because of that, generalized templates can be
|
||||
implemented and re-used. This approach minimizes the number of bugs
|
||||
as compared with manual copy-paste-modify scheme.
|
||||
|
||||
All templates presented below use the ~$var$~ notation to indicate the variable,
|
||||
which will be replaced by the ~generator.py~. Sometimes the upper case is used, i.e.
|
||||
~$VAR$~ (for example, in ~#define~ statements).
|
||||
More detailed description of each variable can be found below:
|
||||
All templates presented below use the ~$var$~ notation to indicate
|
||||
the variable, which will be replaced by the
|
||||
~generator.py~. Sometimes the upper case is used, i.e. ~$VAR$~ (for
|
||||
example, in ~#define~ statements). More detailed description of
|
||||
each variable can be found below:
|
||||
|
||||
| Template variable | Description | Example |
|
||||
|-------------------------------+--------------------------------------------------+--------------------|
|
||||
| ~$group$~ | 'Name of the group' | ~nucleus~ |
|
||||
| ~$group_num$~ | 'Name of the dimensioning variable (scalar)' | ~nucleus_num~ |
|
||||
| ~$group_dset$~ | 'Name of the dataset (vector/matrix/tensor)' | ~nucleus_coord~ |
|
||||
| ~$group_dset_rank$~ | 'Rank of the dataset' | ~2~ |
|
||||
| ~$group_dset_dim$~ | 'Selected dimension of the dataset' | ~nucleus_num~ |
|
||||
| ~$group_dset_dim_list$~ | 'All dimensions of the dataset' | ~{nucleus_num, 3}~ |
|
||||
| ~$group_dset_dtype$~ | 'Basic type of the dataset (int/float/char)' | ~float~ |
|
||||
| ~$group_dset_h5_dtype$~ | 'Type of the dataset in HDF5' | ~double~ |
|
||||
| ~$group_dset_std_dtype_in$~ | 'Input type of the dataset in TEXT [fscanf] ' | ~%lf~ |
|
||||
| ~$group_dset_std_dtype_out$~ | 'Output type of the dataset in TEXT [fprintf]' | ~%24.16e~ |
|
||||
| ~$group_dset_dtype_single$~ | 'Single precision type of the dataset [C]' | ~float~ |
|
||||
| ~$group_dset_dtype_double$~ | 'Double precision type of the dataset [C]' | ~double~ |
|
||||
| ~$group_dset_f_dtype_single$~ | 'Single precision type of the dataset [Fortran]' | ~real(4)~ |
|
||||
| ~$group_dset_f_dtype_double$~ | 'Double precision type of the dataset [Fortran]' | ~real(8)~ |
|
||||
| Template variable | Description | Example |
|
||||
|-------------------------------+------------------------------------------------+--------------------|
|
||||
| ~$group$~ | Name of the group | ~nucleus~ |
|
||||
| ~$group_num$~ | Name of the dimensioning variable (scalar) | ~nucleus_num~ |
|
||||
| ~$group_dset$~ | Name of the dataset (vector/matrix/tensor) | ~nucleus_coord~ |
|
||||
| ~$group_dset_rank$~ | Rank of the dataset | ~2~ |
|
||||
| ~$group_dset_dim$~ | Selected dimension of the dataset | ~nucleus_num~ |
|
||||
| ~$group_dset_dim_list$~ | All dimensions of the dataset | ~{nucleus_num, 3}~ |
|
||||
| ~$group_dset_dtype$~ | Basic type of the dataset (int/float/char) | ~float~ |
|
||||
| ~$group_dset_h5_dtype$~ | Type of the dataset in HDF5 | ~double~ |
|
||||
| ~$group_dset_std_dtype_in$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ |
|
||||
| ~$group_dset_std_dtype_out$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ |
|
||||
| ~$group_dset_dtype_single$~ | Single precision type of the dataset [C] | ~float~ |
|
||||
| ~$group_dset_dtype_double$~ | Double precision type of the dataset [C] | ~double~ |
|
||||
| ~$group_dset_f_dtype_single$~ | Single precision type of the dataset [Fortran] | ~real(4)~ |
|
||||
| ~$group_dset_f_dtype_double$~ | Double precision type of the dataset [Fortran] | ~real(8)~ |
|
||||
|
||||
Note: parent group name is always added to the child objects upon construction of TREXIO
|
||||
(e.g. ~num~ of ~nucleus~ group becomes ~nucleus_num~ and should be accessed accordingly within TREXIO).
|
||||
Note: parent group name is always added to the child objects upon
|
||||
construction of TREXIO (e.g. ~num~ of ~nucleus~ group becomes
|
||||
~nucleus_num~ and should be accessed accordingly within TREXIO).
|
||||
|
||||
TREXIO generator parses the ~trex.json~ file. TREXIO operates with names of variables
|
||||
based on the 1-st (parent group) and 2-nd (child object) levels of ~trex.json~ .
|
||||
The parsed data is divided in 2 parts:
|
||||
TREXIO generator parses the ~trex.json~ file. TREXIO operates with
|
||||
names of variables based on the 1-st (parent group) and 2-nd (child
|
||||
object) levels of ~trex.json~ . The parsed data is divided in 2
|
||||
parts:
|
||||
|
||||
1) Dimensioning variables (contain ~num~ in their names). These are always scalar integers.
|
||||
2) Datasets. These can be vectors, matrices or tensors. The types are indicated in ~trex.json~.
|
||||
Currently supported types: int, float. TODO: strings.
|
||||
|
||||
For each of the aforementioned objects, TREXIO provides *has*, *read* and *write* functionality.
|
||||
TREXIO supports I/O with single or double precision for integer and floating point numbers.
|
||||
|
||||
For each of the aforementioned objects, TREXIO provides *has*,
|
||||
*read* and *write* functionality. TREXIO supports I/O with single
|
||||
or double precision for integer and floating point numbers.
|
||||
|
||||
** Templates for front end has/read/write a dimension
|
||||
|
||||
This section concerns API calls related to dimensioning variables.
|
||||
|
||||
| Function name | Description | Precision |
|
||||
|-------------------------------+-----------------------------------------------------+-----------|
|
||||
| ~trexio_has_$group_num$~ | 'Check if a dimensioning variable exists in a file' | --- |
|
||||
| ~trexio_read_$group_num$~ | 'Read a dimensioning variable ' | Single |
|
||||
| ~trexio_write_$group_num$~ | 'Write a dimensioning variable' | Single |
|
||||
| ~trexio_read_$group_num$_32~ | 'Read a dimensioning variable ' | Single |
|
||||
| ~trexio_write_$group_num$_32~ | 'Write a dimensioning variable' | Single |
|
||||
| ~trexio_read_$group_num$_64~ | 'Read a dimensioning variable ' | Double |
|
||||
| ~trexio_write_$group_num$_64~ | 'Write a dimensioning variable' | Double |
|
||||
| Function name | Description | Precision |
|
||||
|-------------------------------+---------------------------------------------------+-----------|
|
||||
| ~trexio_has_$group_num$~ | Check if a dimensioning variable exists in a file | --- |
|
||||
| ~trexio_read_$group_num$~ | Read a dimensioning variable | Single |
|
||||
| ~trexio_write_$group_num$~ | Write a dimensioning variable | Single |
|
||||
| ~trexio_read_$group_num$_32~ | Read a dimensioning variable | Single |
|
||||
| ~trexio_write_$group_num$_32~ | Write a dimensioning variable | Single |
|
||||
| ~trexio_read_$group_num$_64~ | Read a dimensioning variable | Double |
|
||||
| ~trexio_write_$group_num$_64~ | Write a dimensioning variable | Double |
|
||||
|
||||
*** C templates for front end
|
||||
|
||||
The ~C~ templates that correspond to each of the abovementioned functions can be found below.
|
||||
First parameter is the ~TREXIO~ file handle. Second parameter is the variable to be written/read
|
||||
The ~C~ templates that correspond to each of the abovementioned
|
||||
functions can be found below. First parameter is the ~TREXIO~ file
|
||||
handle. Second parameter is the variable to be written/read
|
||||
to/from the ~TREXIO~ file (except for ~trexio_has_~ functions).
|
||||
Suffixes ~_32~ and ~_64~ correspond to API calls dealing with single and double precision, respectively.
|
||||
The basic (non-suffixed) API call on dimensioning variables deals with single precision (see Table above).
|
||||
Suffixes ~_32~ and ~_64~ correspond to API calls dealing with
|
||||
single and double precision, respectively. The basic
|
||||
(non-suffixed) API call on dimensioning variables deals with single
|
||||
precision (see Table above).
|
||||
|
||||
|
||||
#+begin_src c :tangle hrw_num_front.h :exports none
|
||||
@ -1016,15 +1024,15 @@ end interface
|
||||
|
||||
This section concerns API calls related to datasets.
|
||||
|
||||
| Function name | Description | Precision |
|
||||
|----------------------------------------+---------------------------------------+-----------|
|
||||
| ~trexio_has_$group$_$group_dset$~ | 'Check if a dataset exists in a file' | --- |
|
||||
| ~trexio_read_$group$_$group_dset$~ | 'Read a dataset ' | Double |
|
||||
| ~trexio_write_$group$_$group_dset$~ | 'Write a dataset' | Double |
|
||||
| ~trexio_read_$group$_$group_dset$_32~ | 'Read a dataset' | Single |
|
||||
| ~trexio_write_$group$_$group_dset$_32~ | 'Write a dataset' | Single |
|
||||
| ~trexio_read_$group$_$group_dset$_64~ | 'Read a dataset' | Double |
|
||||
| ~trexio_write_$group$_$group_dset$_64~ | 'Write a dataset' | Double |
|
||||
| Function name | Description | Precision |
|
||||
|----------------------------------------+-------------------------------------+-----------|
|
||||
| ~trexio_has_$group$_$group_dset$~ | Check if a dataset exists in a file | --- |
|
||||
| ~trexio_read_$group$_$group_dset$~ | Read a dataset | Double |
|
||||
| ~trexio_write_$group$_$group_dset$~ | Write a dataset | Double |
|
||||
| ~trexio_read_$group$_$group_dset$_32~ | Read a dataset | Single |
|
||||
| ~trexio_write_$group$_$group_dset$_32~ | Write a dataset | Single |
|
||||
| ~trexio_read_$group$_$group_dset$_64~ | Read a dataset | Double |
|
||||
| ~trexio_write_$group$_$group_dset$_64~ | Write a dataset | Double |
|
||||
|
||||
*** C templates for front end
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user