diff --git a/Sparse.org b/Sparse.org new file mode 100644 index 0000000..727cd24 --- /dev/null +++ b/Sparse.org @@ -0,0 +1,131 @@ + +* JSON + "ao_2e_int" : { + "eri_num" : [ "int", [ ] ] + "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ] + }, + +* Front end + + For example, the integral $\langle ij | kl \rangle = x$ can be + represented represented as + - a quartet of integers $(i,j,k,l)$ + - a floating point value $x$ + + To store $N$ integrals in the file, we can store + - An array of quartets of integers + - An array of values (floats) + + These two arrays have the same size, $N$. + + As the number of integrals to store can be prohibitively large, it + is important to be able to read/write the integrals in chunks. So we + need to give two extra parameters to the functions: + - ~offset~ : the index of the 1st integral we want to read. An + offset of zero implies to read the first integral + - ~num~ : the number of integrals to read + + We need to provide one function to read a chunk of indices, and one + function to read a chunk of values, because some users might want to + read only the values of the integrals, or only the indices. + + Here is an example for the indices: + +#+BEGIN_SRC c +trexio_exit_code +trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file, + const int64_t offset, + const int64_t num, + int32_t* buffer) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (offset < 0L) return TREXIO_INVALID_ARG_2; + if (num < 0L) return TREXIO_INVALID_ARG_3; + + const uint32_t rank = 4; // To be set by generator : number of indices + + int64_t nmax; // Max number of integrals + trexio_exit_code rc; + + rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + if (rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + break; + + case TREXIO_HDF5: + return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + break; +/* + case TREXIO_JSON: + return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + break; +,*/ + default: + return TREXIO_FAILURE; /* Impossible case */ + } +} +#+END_SRC + +For the values, + +#+BEGIN_SRC c +trexio_exit_code +trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file, + const int64_t offset, + const int64_t num, + double* buffer) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (offset < 0L) return TREXIO_INVALID_ARG_2; + if (num < 0L) return TREXIO_INVALID_ARG_3; + + int64_t nmax; // Max number of integrals + trexio_exit_code rc; + + rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + if (rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax); + break; + + case TREXIO_HDF5: + return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); + break; +/* + case TREXIO_JSON: + return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); + break; +,*/ + default: + return TREXIO_FAILURE; /* Impossible case */ + } +} +#+END_SRC + +* Text back end + As the size of the dataset should be extensible, the simplest + solution is to use one file for each sparse data set, and store a + the name of this file in the group. + Each integral can be a line in the file: + i j k l x + which can be read with "%10ld %10ld %10ld %10ld %24.16e". + The offset can be used with ~fseek(69L*offset, SEEK_SET)~ + +* HDF5 Back end + + We need to declare the number of rows of the dataset as + ~UNLIMITED~. This requires to use the ~Chunked~ storage, and the + chunks should absolutely not be larger than 1MB. + + To extend the storage, see : + https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html + (figure 17) + + If the offset+num > nmax, we need to extend the dataset. diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 1aff31f..dd8d503 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1,4 +1,4 @@ -d+TITLE: Front end API +#+TITLE: Front end API #+PROPERTY: comments org #+SETUPFILE: ../../docs/theme.setup # -*- mode: org -*- @@ -106,6 +106,7 @@ typedef int32_t trexio_exit_code; #+begin_src c :tangle trexio_private.h #define TREXIO_MAX_FILENAME_LENGTH 4096 #+end_src + * Front end All calls to TREXIO are thread-safe. @@ -694,69 +695,76 @@ end interface } #+end_src - ~TREXIO~ is generated automatically by the ~generator.py~ Python script - based on the tree-like configuration provided in the ~trex.json~ file. - Because of that, generalized templates can be implemented and re-used. - This approach minimizes the number of bugs as compared with manual copy-paste-modify scheme. + ~TREXIO~ is generated automatically by the ~generator.py~ Python + script based on the tree-like configuration provided in the + ~trex.json~ file. Because of that, generalized templates can be + implemented and re-used. This approach minimizes the number of bugs + as compared with manual copy-paste-modify scheme. - All templates presented below use the ~$var$~ notation to indicate the variable, - which will be replaced by the ~generator.py~. Sometimes the upper case is used, i.e. - ~$VAR$~ (for example, in ~#define~ statements). - More detailed description of each variable can be found below: + All templates presented below use the ~$var$~ notation to indicate + the variable, which will be replaced by the + ~generator.py~. Sometimes the upper case is used, i.e. ~$VAR$~ (for + example, in ~#define~ statements). More detailed description of + each variable can be found below: - | Template variable | Description | Example | - |-------------------------------+--------------------------------------------------+--------------------| - | ~$group$~ | 'Name of the group' | ~nucleus~ | - | ~$group_num$~ | 'Name of the dimensioning variable (scalar)' | ~nucleus_num~ | - | ~$group_dset$~ | 'Name of the dataset (vector/matrix/tensor)' | ~nucleus_coord~ | - | ~$group_dset_rank$~ | 'Rank of the dataset' | ~2~ | - | ~$group_dset_dim$~ | 'Selected dimension of the dataset' | ~nucleus_num~ | - | ~$group_dset_dim_list$~ | 'All dimensions of the dataset' | ~{nucleus_num, 3}~ | - | ~$group_dset_dtype$~ | 'Basic type of the dataset (int/float/char)' | ~float~ | - | ~$group_dset_h5_dtype$~ | 'Type of the dataset in HDF5' | ~double~ | - | ~$group_dset_std_dtype_in$~ | 'Input type of the dataset in TEXT [fscanf] ' | ~%lf~ | - | ~$group_dset_std_dtype_out$~ | 'Output type of the dataset in TEXT [fprintf]' | ~%24.16e~ | - | ~$group_dset_dtype_single$~ | 'Single precision type of the dataset [C]' | ~float~ | - | ~$group_dset_dtype_double$~ | 'Double precision type of the dataset [C]' | ~double~ | - | ~$group_dset_f_dtype_single$~ | 'Single precision type of the dataset [Fortran]' | ~real(4)~ | - | ~$group_dset_f_dtype_double$~ | 'Double precision type of the dataset [Fortran]' | ~real(8)~ | + | Template variable | Description | Example | + |-------------------------------+------------------------------------------------+--------------------| + | ~$group$~ | Name of the group | ~nucleus~ | + | ~$group_num$~ | Name of the dimensioning variable (scalar) | ~nucleus_num~ | + | ~$group_dset$~ | Name of the dataset (vector/matrix/tensor) | ~nucleus_coord~ | + | ~$group_dset_rank$~ | Rank of the dataset | ~2~ | + | ~$group_dset_dim$~ | Selected dimension of the dataset | ~nucleus_num~ | + | ~$group_dset_dim_list$~ | All dimensions of the dataset | ~{nucleus_num, 3}~ | + | ~$group_dset_dtype$~ | Basic type of the dataset (int/float/char) | ~float~ | + | ~$group_dset_h5_dtype$~ | Type of the dataset in HDF5 | ~double~ | + | ~$group_dset_std_dtype_in$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ | + | ~$group_dset_std_dtype_out$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ | + | ~$group_dset_dtype_single$~ | Single precision type of the dataset [C] | ~float~ | + | ~$group_dset_dtype_double$~ | Double precision type of the dataset [C] | ~double~ | + | ~$group_dset_f_dtype_single$~ | Single precision type of the dataset [Fortran] | ~real(4)~ | + | ~$group_dset_f_dtype_double$~ | Double precision type of the dataset [Fortran] | ~real(8)~ | - Note: parent group name is always added to the child objects upon construction of TREXIO - (e.g. ~num~ of ~nucleus~ group becomes ~nucleus_num~ and should be accessed accordingly within TREXIO). + Note: parent group name is always added to the child objects upon + construction of TREXIO (e.g. ~num~ of ~nucleus~ group becomes + ~nucleus_num~ and should be accessed accordingly within TREXIO). - TREXIO generator parses the ~trex.json~ file. TREXIO operates with names of variables - based on the 1-st (parent group) and 2-nd (child object) levels of ~trex.json~ . - The parsed data is divided in 2 parts: + TREXIO generator parses the ~trex.json~ file. TREXIO operates with + names of variables based on the 1-st (parent group) and 2-nd (child + object) levels of ~trex.json~ . The parsed data is divided in 2 + parts: 1) Dimensioning variables (contain ~num~ in their names). These are always scalar integers. 2) Datasets. These can be vectors, matrices or tensors. The types are indicated in ~trex.json~. Currently supported types: int, float. TODO: strings. - For each of the aforementioned objects, TREXIO provides *has*, *read* and *write* functionality. - TREXIO supports I/O with single or double precision for integer and floating point numbers. - + For each of the aforementioned objects, TREXIO provides *has*, + *read* and *write* functionality. TREXIO supports I/O with single + or double precision for integer and floating point numbers. ** Templates for front end has/read/write a dimension This section concerns API calls related to dimensioning variables. - | Function name | Description | Precision | - |-------------------------------+-----------------------------------------------------+-----------| - | ~trexio_has_$group_num$~ | 'Check if a dimensioning variable exists in a file' | --- | - | ~trexio_read_$group_num$~ | 'Read a dimensioning variable ' | Single | - | ~trexio_write_$group_num$~ | 'Write a dimensioning variable' | Single | - | ~trexio_read_$group_num$_32~ | 'Read a dimensioning variable ' | Single | - | ~trexio_write_$group_num$_32~ | 'Write a dimensioning variable' | Single | - | ~trexio_read_$group_num$_64~ | 'Read a dimensioning variable ' | Double | - | ~trexio_write_$group_num$_64~ | 'Write a dimensioning variable' | Double | + | Function name | Description | Precision | + |-------------------------------+---------------------------------------------------+-----------| + | ~trexio_has_$group_num$~ | Check if a dimensioning variable exists in a file | --- | + | ~trexio_read_$group_num$~ | Read a dimensioning variable | Single | + | ~trexio_write_$group_num$~ | Write a dimensioning variable | Single | + | ~trexio_read_$group_num$_32~ | Read a dimensioning variable | Single | + | ~trexio_write_$group_num$_32~ | Write a dimensioning variable | Single | + | ~trexio_read_$group_num$_64~ | Read a dimensioning variable | Double | + | ~trexio_write_$group_num$_64~ | Write a dimensioning variable | Double | *** C templates for front end - The ~C~ templates that correspond to each of the abovementioned functions can be found below. - First parameter is the ~TREXIO~ file handle. Second parameter is the variable to be written/read + The ~C~ templates that correspond to each of the abovementioned + functions can be found below. First parameter is the ~TREXIO~ file + handle. Second parameter is the variable to be written/read to/from the ~TREXIO~ file (except for ~trexio_has_~ functions). - Suffixes ~_32~ and ~_64~ correspond to API calls dealing with single and double precision, respectively. - The basic (non-suffixed) API call on dimensioning variables deals with single precision (see Table above). + Suffixes ~_32~ and ~_64~ correspond to API calls dealing with + single and double precision, respectively. The basic + (non-suffixed) API call on dimensioning variables deals with single + precision (see Table above). #+begin_src c :tangle hrw_num_front.h :exports none @@ -1016,15 +1024,15 @@ end interface This section concerns API calls related to datasets. - | Function name | Description | Precision | - |----------------------------------------+---------------------------------------+-----------| - | ~trexio_has_$group$_$group_dset$~ | 'Check if a dataset exists in a file' | --- | - | ~trexio_read_$group$_$group_dset$~ | 'Read a dataset ' | Double | - | ~trexio_write_$group$_$group_dset$~ | 'Write a dataset' | Double | - | ~trexio_read_$group$_$group_dset$_32~ | 'Read a dataset' | Single | - | ~trexio_write_$group$_$group_dset$_32~ | 'Write a dataset' | Single | - | ~trexio_read_$group$_$group_dset$_64~ | 'Read a dataset' | Double | - | ~trexio_write_$group$_$group_dset$_64~ | 'Write a dataset' | Double | + | Function name | Description | Precision | + |----------------------------------------+-------------------------------------+-----------| + | ~trexio_has_$group$_$group_dset$~ | Check if a dataset exists in a file | --- | + | ~trexio_read_$group$_$group_dset$~ | Read a dataset | Double | + | ~trexio_write_$group$_$group_dset$~ | Write a dataset | Double | + | ~trexio_read_$group$_$group_dset$_32~ | Read a dataset | Single | + | ~trexio_write_$group$_$group_dset$_32~ | Write a dataset | Single | + | ~trexio_read_$group$_$group_dset$_64~ | Read a dataset | Double | + | ~trexio_write_$group$_$group_dset$_64~ | Write a dataset | Double | *** C templates for front end